diff --git a/code/scripts/build_similarity_cache_parquet.py b/code/scripts/build_similarity_cache_parquet.py index 99d784d..cc39f6d 100644 --- a/code/scripts/build_similarity_cache_parquet.py +++ b/code/scripts/build_similarity_cache_parquet.py @@ -202,7 +202,8 @@ def build_cache( df = similarity.cards_df df["is_land"] = df["type"].str.contains("Land", case=False, na=False) df["is_multifaced"] = df["layout"].str.lower().isin(["modal_dfc", "transform", "reversible_card", "double_faced_token"]) - df["tag_count"] = df["themeTags"].apply(lambda x: len(x.split("|")) if pd.notna(x) and x else 0) + # M4: themeTags is now a list (Parquet format), not a pipe-delimited string + df["tag_count"] = df["themeTags"].apply(lambda x: len(x) if isinstance(x, list) else 0) # Keep cards that are either: # 1. Not lands, OR