From 505bbdf166c857ac145ad3b48d6029f284f79b09 Mon Sep 17 00:00:00 2001 From: matt Date: Sun, 19 Oct 2025 08:26:20 -0700 Subject: [PATCH] fix: handle numpy arrays in card_similarity parse_theme_tags The similarity cache build was failing because parse_theme_tags() was checking isinstance(tags, list) but Parquet files return numpy.ndarray objects. This caused all cards to be flagged as having no theme tags, resulting in an empty cache. Changed to use hasattr(__len__) check instead, which works for both lists and numpy arrays. --- code/web/services/card_similarity.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/code/web/services/card_similarity.py b/code/web/services/card_similarity.py index 4c3e68a..589d86d 100644 --- a/code/web/services/card_similarity.py +++ b/code/web/services/card_similarity.py @@ -252,9 +252,10 @@ class CardSimilarity: if pd.isna(tags) if isinstance(tags, (str, float, int, type(None))) else False: return set() - if isinstance(tags, list): - # M4: Parquet format - already a list - return set(tags) if tags else set() + # M4: Handle numpy arrays from Parquet files + if hasattr(tags, '__len__') and not isinstance(tags, str): + # Parquet format - convert array-like to list + return set(list(tags)) if len(tags) > 0 else set() if isinstance(tags, str): # Handle string representation of list: "['tag1', 'tag2']"