fix: handle themeTags as list in similarity cache builder

This commit is contained in:
matt 2025-10-18 21:50:12 -07:00
parent dec6e659b8
commit e92f2ccfb4

View file

@ -202,7 +202,8 @@ def build_cache(
df = similarity.cards_df df = similarity.cards_df
df["is_land"] = df["type"].str.contains("Land", case=False, na=False) df["is_land"] = df["type"].str.contains("Land", case=False, na=False)
df["is_multifaced"] = df["layout"].str.lower().isin(["modal_dfc", "transform", "reversible_card", "double_faced_token"]) df["is_multifaced"] = df["layout"].str.lower().isin(["modal_dfc", "transform", "reversible_card", "double_faced_token"])
df["tag_count"] = df["themeTags"].apply(lambda x: len(x.split("|")) if pd.notna(x) and x else 0) # M4: themeTags is now a list (Parquet format), not a pipe-delimited string
df["tag_count"] = df["themeTags"].apply(lambda x: len(x) if isinstance(x, list) else 0)
# Keep cards that are either: # Keep cards that are either:
# 1. Not lands, OR # 1. Not lands, OR