fix: handle themeTags as list in similarity cache builder

2026-01-31 13:45:19 +01:00 · 2025-10-18 21:50:12 -07:00 · 2025-10-18 21:50:12 -07:00 · e92f2ccfb4
commit e92f2ccfb4
parent dec6e659b8
1 changed files with 2 additions and 1 deletions
--- a/code/scripts/build_similarity_cache_parquet.py
+++ b/code/scripts/build_similarity_cache_parquet.py
@ -202,7 +202,8 @@ def build_cache(
    df = similarity.cards_df
    df["is_land"] = df["type"].str.contains("Land", case=False, na=False)
    df["is_multifaced"] = df["layout"].str.lower().isin(["modal_dfc", "transform", "reversible_card", "double_faced_token"])
-    df["tag_count"] = df["themeTags"].apply(lambda x: len(x.split("|")) if pd.notna(x) and x else 0)
+    # M4: themeTags is now a list (Parquet format), not a pipe-delimited string
    df["tag_count"] = df["themeTags"].apply(lambda x: len(x) if isinstance(x, list) else 0)
    # Keep cards that are either:
    # 1. Not lands, OR