feat: migrate to unified Parquet format with instant GitHub setup and 4x faster tagging

This commit is contained in:
matt 2025-10-18 21:32:12 -07:00
parent e9e949aae3
commit 8435312c8f
58 changed files with 11921 additions and 3961 deletions

View file

@ -247,11 +247,13 @@ class CardSimilarity:
Returns:
Set of theme tag strings
"""
if pd.isna(tags) or not tags:
# M4: Handle both scalar NA (CSV) and array values (Parquet)
if pd.isna(tags) if isinstance(tags, (str, float, int, type(None))) else False:
return set()
if isinstance(tags, list):
return set(tags)
# M4: Parquet format - already a list
return set(tags) if tags else set()
if isinstance(tags, str):
# Handle string representation of list: "['tag1', 'tag2']"