From e92f2ccfb4948199cc7a0979c4f0920d6581b880 Mon Sep 17 00:00:00 2001 From: matt Date: Sat, 18 Oct 2025 21:50:12 -0700 Subject: [PATCH] fix: handle themeTags as list in similarity cache builder --- code/scripts/build_similarity_cache_parquet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/code/scripts/build_similarity_cache_parquet.py b/code/scripts/build_similarity_cache_parquet.py index 99d784d..cc39f6d 100644 --- a/code/scripts/build_similarity_cache_parquet.py +++ b/code/scripts/build_similarity_cache_parquet.py @@ -202,7 +202,8 @@ def build_cache( df = similarity.cards_df df["is_land"] = df["type"].str.contains("Land", case=False, na=False) df["is_multifaced"] = df["layout"].str.lower().isin(["modal_dfc", "transform", "reversible_card", "double_faced_token"]) - df["tag_count"] = df["themeTags"].apply(lambda x: len(x.split("|")) if pd.notna(x) and x else 0) + # M4: themeTags is now a list (Parquet format), not a pipe-delimited string + df["tag_count"] = df["themeTags"].apply(lambda x: len(x) if isinstance(x, list) else 0) # Keep cards that are either: # 1. Not lands, OR