fix: use correct processed/ path for similarity cache building

This commit is contained in:
matt 2025-10-18 21:43:04 -07:00
parent 8435312c8f
commit b92918581e
2 changed files with 5 additions and 4 deletions

View file

@ -155,7 +155,7 @@ def build_cache(
""" """
Build similarity cache for all cards. Build similarity cache for all cards.
NOTE: Assumes card data (cards.csv, all_cards.parquet) and tagged data already exist. NOTE: Assumes card data (card_files/processed/all_cards.parquet) and tagged data already exist.
Run setup and tagging separately before building cache. Run setup and tagging separately before building cache.
Args: Args:

View file

@ -31,12 +31,13 @@ class CardSimilarity:
Initialize similarity calculator. Initialize similarity calculator.
Args: Args:
cards_df: DataFrame with card data. If None, loads from all_cards.parquet cards_df: DataFrame with card data. If None, loads from processed all_cards.parquet
cache: SimilarityCache instance. If None, uses global singleton cache: SimilarityCache instance. If None, uses global singleton
""" """
if cards_df is None: if cards_df is None:
# Load from default location # Load from processed directory (M4 Parquet migration)
parquet_path = Path(__file__).parents[3] / "card_files" / "all_cards.parquet" from path_util import get_processed_cards_path
parquet_path = get_processed_cards_path()
logger.info(f"Loading cards from {parquet_path}") logger.info(f"Loading cards from {parquet_path}")
self.cards_df = pd.read_parquet(parquet_path) self.cards_df = pd.read_parquet(parquet_path)
else: else: