mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat: consolidate card data into optimized format for faster queries and reduced file sizes
This commit is contained in:
parent
5753bb19f8
commit
f70ffca23e
24 changed files with 2903 additions and 135 deletions
408
code/tests/test_all_cards_loader.py
Normal file
408
code/tests/test_all_cards_loader.py
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
"""
|
||||
Tests for AllCardsLoader and CardQueryBuilder
|
||||
|
||||
Tests cover:
|
||||
- Loading and caching behavior
|
||||
- Single and batch card lookups
|
||||
- Color, theme, and type filtering
|
||||
- Text search
|
||||
- Query builder fluent API
|
||||
- Performance benchmarks
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from code.services.all_cards_loader import AllCardsLoader
|
||||
from code.services.card_query_builder import CardQueryBuilder
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_cards_df():
|
||||
"""Create a sample DataFrame for testing."""
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"name": [
|
||||
"Sol Ring",
|
||||
"Lightning Bolt",
|
||||
"Counterspell",
|
||||
"Giant Growth",
|
||||
"Goblin Token Maker",
|
||||
"Dark Ritual",
|
||||
"Swords to Plowshares",
|
||||
"Birds of Paradise",
|
||||
],
|
||||
"colorIdentity": ["Colorless", "R", "U", "G", "R", "B", "W", "G"],
|
||||
"type": [
|
||||
"Artifact",
|
||||
"Instant",
|
||||
"Instant",
|
||||
"Instant",
|
||||
"Creature — Goblin",
|
||||
"Instant",
|
||||
"Instant",
|
||||
"Creature — Bird",
|
||||
],
|
||||
"text": [
|
||||
"Add two mana",
|
||||
"Deal 3 damage",
|
||||
"Counter target spell",
|
||||
"Target creature gets +3/+3",
|
||||
"When this enters, create two 1/1 red Goblin creature tokens",
|
||||
"Add three black mana",
|
||||
"Exile target creature",
|
||||
"Flying, Add one mana of any color",
|
||||
],
|
||||
"themeTags": [
|
||||
"",
|
||||
"burn,damage",
|
||||
"control,counterspells",
|
||||
"combat,pump",
|
||||
"tokens,goblins",
|
||||
"ritual,fast-mana",
|
||||
"removal,exile",
|
||||
"ramp,mana-dork",
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_parquet_file(sample_cards_df):
|
||||
"""Create a temporary Parquet file for testing."""
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as tmp:
|
||||
sample_cards_df.to_parquet(tmp.name, engine="pyarrow")
|
||||
yield tmp.name
|
||||
os.unlink(tmp.name)
|
||||
|
||||
|
||||
def test_loader_initialization(sample_parquet_file):
|
||||
"""Test AllCardsLoader initialization."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file, cache_ttl=60)
|
||||
assert loader.file_path == sample_parquet_file
|
||||
assert loader.cache_ttl == 60
|
||||
assert loader._df is None
|
||||
|
||||
|
||||
def test_loader_load(sample_parquet_file):
|
||||
"""Test loading Parquet file."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
df = loader.load()
|
||||
assert len(df) == 8
|
||||
assert "name" in df.columns
|
||||
assert "colorIdentity" in df.columns
|
||||
|
||||
|
||||
def test_loader_caching(sample_parquet_file):
|
||||
"""Test that caching works and doesn't reload unnecessarily."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file, cache_ttl=300)
|
||||
|
||||
# First load
|
||||
start_time = time.time()
|
||||
df1 = loader.load()
|
||||
first_load_time = time.time() - start_time
|
||||
|
||||
# Second load (should use cache)
|
||||
start_time = time.time()
|
||||
df2 = loader.load()
|
||||
cached_load_time = time.time() - start_time
|
||||
|
||||
# Cache should be much faster
|
||||
assert cached_load_time < first_load_time / 2
|
||||
assert df1 is df2 # Same object
|
||||
|
||||
|
||||
def test_loader_force_reload(sample_parquet_file):
|
||||
"""Test force_reload flag."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
df1 = loader.load()
|
||||
df2 = loader.load(force_reload=True)
|
||||
|
||||
assert df1 is not df2 # Different objects
|
||||
assert len(df1) == len(df2) # Same data
|
||||
|
||||
|
||||
def test_loader_cache_expiration(sample_parquet_file):
|
||||
"""Test cache expiration after TTL."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file, cache_ttl=1)
|
||||
|
||||
df1 = loader.load()
|
||||
time.sleep(1.1) # Wait for TTL to expire
|
||||
df2 = loader.load()
|
||||
|
||||
assert df1 is not df2 # Should have reloaded
|
||||
|
||||
|
||||
def test_get_by_name(sample_parquet_file):
|
||||
"""Test single card lookup by name."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
card = loader.get_by_name("Sol Ring")
|
||||
assert card is not None
|
||||
assert card["name"] == "Sol Ring"
|
||||
assert card["colorIdentity"] == "Colorless"
|
||||
|
||||
# Non-existent card
|
||||
card = loader.get_by_name("Nonexistent Card")
|
||||
assert card is None
|
||||
|
||||
|
||||
def test_get_by_names(sample_parquet_file):
|
||||
"""Test batch card lookup by names."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
cards = loader.get_by_names(["Sol Ring", "Lightning Bolt", "Counterspell"])
|
||||
assert len(cards) == 3
|
||||
assert "Sol Ring" in cards["name"].values
|
||||
assert "Lightning Bolt" in cards["name"].values
|
||||
|
||||
# Empty list
|
||||
cards = loader.get_by_names([])
|
||||
assert len(cards) == 0
|
||||
|
||||
# Non-existent cards
|
||||
cards = loader.get_by_names(["Nonexistent1", "Nonexistent2"])
|
||||
assert len(cards) == 0
|
||||
|
||||
|
||||
def test_filter_by_color_identity(sample_parquet_file):
|
||||
"""Test color identity filtering."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
# Single color
|
||||
red_cards = loader.filter_by_color_identity(["R"])
|
||||
assert len(red_cards) == 2
|
||||
assert "Lightning Bolt" in red_cards["name"].values
|
||||
assert "Goblin Token Maker" in red_cards["name"].values
|
||||
|
||||
# Colorless
|
||||
colorless = loader.filter_by_color_identity(["Colorless"])
|
||||
assert len(colorless) == 1
|
||||
assert colorless["name"].values[0] == "Sol Ring"
|
||||
|
||||
|
||||
def test_filter_by_themes(sample_parquet_file):
|
||||
"""Test theme filtering."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
# Single theme
|
||||
token_cards = loader.filter_by_themes(["tokens"], mode="any")
|
||||
assert len(token_cards) == 1
|
||||
assert token_cards["name"].values[0] == "Goblin Token Maker"
|
||||
|
||||
# Multiple themes (any)
|
||||
cards = loader.filter_by_themes(["burn", "removal"], mode="any")
|
||||
assert len(cards) == 2 # Lightning Bolt and Swords to Plowshares
|
||||
|
||||
# Multiple themes (all)
|
||||
cards = loader.filter_by_themes(["tokens", "goblins"], mode="all")
|
||||
assert len(cards) == 1
|
||||
assert cards["name"].values[0] == "Goblin Token Maker"
|
||||
|
||||
|
||||
def test_filter_by_type(sample_parquet_file):
|
||||
"""Test type filtering."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
creatures = loader.filter_by_type("Creature")
|
||||
assert len(creatures) == 2
|
||||
assert "Goblin Token Maker" in creatures["name"].values
|
||||
assert "Birds of Paradise" in creatures["name"].values
|
||||
|
||||
instants = loader.filter_by_type("Instant")
|
||||
assert len(instants) == 5
|
||||
|
||||
|
||||
def test_search(sample_parquet_file):
|
||||
"""Test text search."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
# Search in text
|
||||
results = loader.search("token")
|
||||
assert len(results) >= 1
|
||||
assert "Goblin Token Maker" in results["name"].values
|
||||
|
||||
# Search in name
|
||||
results = loader.search("Sol")
|
||||
assert len(results) == 1
|
||||
assert results["name"].values[0] == "Sol Ring"
|
||||
|
||||
# Limit results
|
||||
results = loader.search("mana", limit=1)
|
||||
assert len(results) == 1
|
||||
|
||||
|
||||
def test_get_stats(sample_parquet_file):
|
||||
"""Test stats retrieval."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
loader.load()
|
||||
|
||||
stats = loader.get_stats()
|
||||
assert stats["total_cards"] == 8
|
||||
assert stats["cached"] is True
|
||||
assert stats["file_size_mb"] >= 0 # Small test file may round to 0
|
||||
assert "cache_age_seconds" in stats
|
||||
|
||||
|
||||
def test_clear_cache(sample_parquet_file):
|
||||
"""Test cache clearing."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
loader.load()
|
||||
|
||||
assert loader._df is not None
|
||||
loader.clear_cache()
|
||||
assert loader._df is None
|
||||
|
||||
|
||||
def test_query_builder_basic(sample_parquet_file):
|
||||
"""Test basic query builder usage."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
builder = CardQueryBuilder(loader=loader)
|
||||
|
||||
# Execute without filters
|
||||
results = builder.execute()
|
||||
assert len(results) == 8
|
||||
|
||||
# Single filter
|
||||
results = builder.reset().colors(["R"]).execute()
|
||||
assert len(results) == 2
|
||||
|
||||
|
||||
def test_query_builder_chaining(sample_parquet_file):
|
||||
"""Test query builder method chaining."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
results = (
|
||||
CardQueryBuilder(loader=loader)
|
||||
.types("Creature")
|
||||
.themes(["tokens"], mode="any")
|
||||
.execute()
|
||||
)
|
||||
assert len(results) == 1
|
||||
assert results["name"].values[0] == "Goblin Token Maker"
|
||||
|
||||
|
||||
def test_query_builder_names(sample_parquet_file):
|
||||
"""Test query builder with specific names."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
results = (
|
||||
CardQueryBuilder(loader=loader)
|
||||
.names(["Sol Ring", "Lightning Bolt"])
|
||||
.execute()
|
||||
)
|
||||
assert len(results) == 2
|
||||
|
||||
|
||||
def test_query_builder_limit(sample_parquet_file):
|
||||
"""Test query builder limit."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
results = CardQueryBuilder(loader=loader).limit(3).execute()
|
||||
assert len(results) == 3
|
||||
|
||||
|
||||
def test_query_builder_count(sample_parquet_file):
|
||||
"""Test query builder count method."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
count = CardQueryBuilder(loader=loader).types("Instant").count()
|
||||
assert count == 5
|
||||
|
||||
|
||||
def test_query_builder_first(sample_parquet_file):
|
||||
"""Test query builder first method."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
card = CardQueryBuilder(loader=loader).colors(["R"]).first()
|
||||
assert card is not None
|
||||
assert card["colorIdentity"] == "R"
|
||||
|
||||
# No results
|
||||
card = CardQueryBuilder(loader=loader).colors(["X"]).first()
|
||||
assert card is None
|
||||
|
||||
|
||||
def test_query_builder_complex(sample_parquet_file):
|
||||
"""Test complex query with multiple filters."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
|
||||
results = (
|
||||
CardQueryBuilder(loader=loader)
|
||||
.types("Instant")
|
||||
.colors(["R"])
|
||||
.search("damage")
|
||||
.limit(5)
|
||||
.execute()
|
||||
)
|
||||
assert len(results) == 1
|
||||
assert results["name"].values[0] == "Lightning Bolt"
|
||||
|
||||
|
||||
def test_performance_single_lookup(sample_parquet_file):
|
||||
"""Benchmark single card lookup performance."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
loader.load() # Warm up cache
|
||||
|
||||
start = time.time()
|
||||
for _ in range(100):
|
||||
loader.get_by_name("Sol Ring")
|
||||
elapsed = time.time() - start
|
||||
|
||||
avg_time_ms = (elapsed / 100) * 1000
|
||||
print(f"\nSingle lookup avg: {avg_time_ms:.3f}ms")
|
||||
assert avg_time_ms < 10 # Should be <10ms per lookup
|
||||
|
||||
|
||||
def test_performance_batch_lookup(sample_parquet_file):
|
||||
"""Benchmark batch card lookup performance."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
loader.load() # Warm up cache
|
||||
|
||||
names = ["Sol Ring", "Lightning Bolt", "Counterspell"]
|
||||
|
||||
start = time.time()
|
||||
for _ in range(100):
|
||||
loader.get_by_names(names)
|
||||
elapsed = time.time() - start
|
||||
|
||||
avg_time_ms = (elapsed / 100) * 1000
|
||||
print(f"\nBatch lookup (3 cards) avg: {avg_time_ms:.3f}ms")
|
||||
assert avg_time_ms < 15 # Should be <15ms per batch
|
||||
|
||||
|
||||
def test_performance_filter_by_color(sample_parquet_file):
|
||||
"""Benchmark color filtering performance."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
loader.load() # Warm up cache
|
||||
|
||||
start = time.time()
|
||||
for _ in range(100):
|
||||
loader.filter_by_color_identity(["R"])
|
||||
elapsed = time.time() - start
|
||||
|
||||
avg_time_ms = (elapsed / 100) * 1000
|
||||
print(f"\nColor filter avg: {avg_time_ms:.3f}ms")
|
||||
assert avg_time_ms < 20 # Should be <20ms per filter
|
||||
|
||||
|
||||
def test_performance_search(sample_parquet_file):
|
||||
"""Benchmark text search performance."""
|
||||
loader = AllCardsLoader(file_path=sample_parquet_file)
|
||||
loader.load() # Warm up cache
|
||||
|
||||
start = time.time()
|
||||
for _ in range(100):
|
||||
loader.search("token", limit=100)
|
||||
elapsed = time.time() - start
|
||||
|
||||
avg_time_ms = (elapsed / 100) * 1000
|
||||
print(f"\nText search avg: {avg_time_ms:.3f}ms")
|
||||
assert avg_time_ms < 50 # Should be <50ms per search
|
||||
340
code/tests/test_card_aggregator.py
Normal file
340
code/tests/test_card_aggregator.py
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
"""
|
||||
Tests for Card Aggregator
|
||||
|
||||
Tests the CardAggregator class functionality including:
|
||||
- Full aggregation of multiple CSV files
|
||||
- Deduplication (keeping most recent)
|
||||
- Exclusion of master files (cards.csv, commander_cards.csv)
|
||||
- Validation of output
|
||||
- Version rotation
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from code.file_setup.card_aggregator import CardAggregator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dirs():
|
||||
"""Create temporary directories for testing."""
|
||||
with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as output_dir:
|
||||
yield source_dir, output_dir
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_card_data():
|
||||
"""Sample card data for testing."""
|
||||
return {
|
||||
"name": ["Sol Ring", "Lightning Bolt", "Counterspell"],
|
||||
"faceName": ["Sol Ring", "Lightning Bolt", "Counterspell"],
|
||||
"colorIdentity": ["Colorless", "R", "U"],
|
||||
"manaCost": ["{1}", "{R}", "{U}{U}"],
|
||||
"manaValue": [1, 1, 2],
|
||||
"type": ["Artifact", "Instant", "Instant"],
|
||||
"text": [
|
||||
"Add two colorless mana",
|
||||
"Deal 3 damage",
|
||||
"Counter target spell",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_ensure_output_dir(temp_dirs):
|
||||
"""Test that output directory is created."""
|
||||
_, output_dir = temp_dirs
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
|
||||
assert os.path.exists(output_dir)
|
||||
assert aggregator.output_dir == output_dir
|
||||
|
||||
|
||||
def test_get_card_csvs_excludes_master_files(temp_dirs):
|
||||
"""Test that cards.csv and commander_cards.csv are excluded."""
|
||||
source_dir, _ = temp_dirs
|
||||
|
||||
# Create test files
|
||||
Path(source_dir, "cards.csv").touch()
|
||||
Path(source_dir, "commander_cards.csv").touch()
|
||||
Path(source_dir, "blue_cards.csv").touch()
|
||||
Path(source_dir, "red_cards.csv").touch()
|
||||
Path(source_dir, ".temp_cards.csv").touch()
|
||||
Path(source_dir, "_temp_cards.csv").touch()
|
||||
|
||||
aggregator = CardAggregator()
|
||||
csv_files = aggregator.get_card_csvs(source_dir)
|
||||
|
||||
# Should only include blue_cards.csv and red_cards.csv
|
||||
basenames = [os.path.basename(f) for f in csv_files]
|
||||
assert "blue_cards.csv" in basenames
|
||||
assert "red_cards.csv" in basenames
|
||||
assert "cards.csv" not in basenames
|
||||
assert "commander_cards.csv" not in basenames
|
||||
assert ".temp_cards.csv" not in basenames
|
||||
assert "_temp_cards.csv" not in basenames
|
||||
assert len(csv_files) == 2
|
||||
|
||||
|
||||
def test_deduplicate_cards(sample_card_data):
|
||||
"""Test that duplicate cards are removed, keeping the last occurrence."""
|
||||
# Create DataFrame with duplicates
|
||||
df = pd.DataFrame(sample_card_data)
|
||||
|
||||
# Add duplicate Sol Ring with different text
|
||||
duplicate_data = {
|
||||
"name": ["Sol Ring"],
|
||||
"faceName": ["Sol Ring"],
|
||||
"colorIdentity": ["Colorless"],
|
||||
"manaCost": ["{1}"],
|
||||
"manaValue": [1],
|
||||
"type": ["Artifact"],
|
||||
"text": ["Add two colorless mana (updated)"],
|
||||
}
|
||||
df_duplicate = pd.DataFrame(duplicate_data)
|
||||
df_combined = pd.concat([df, df_duplicate], ignore_index=True)
|
||||
|
||||
# Should have 4 rows before deduplication
|
||||
assert len(df_combined) == 4
|
||||
|
||||
aggregator = CardAggregator()
|
||||
df_deduped = aggregator.deduplicate_cards(df_combined)
|
||||
|
||||
# Should have 3 rows after deduplication
|
||||
assert len(df_deduped) == 3
|
||||
|
||||
# Should keep the last Sol Ring (updated text)
|
||||
sol_ring = df_deduped[df_deduped["name"] == "Sol Ring"].iloc[0]
|
||||
assert "updated" in sol_ring["text"]
|
||||
|
||||
|
||||
def test_aggregate_all(temp_dirs, sample_card_data):
|
||||
"""Test full aggregation of multiple CSV files."""
|
||||
source_dir, output_dir = temp_dirs
|
||||
|
||||
# Create test CSV files
|
||||
df1 = pd.DataFrame(
|
||||
{
|
||||
"name": ["Sol Ring", "Lightning Bolt"],
|
||||
"faceName": ["Sol Ring", "Lightning Bolt"],
|
||||
"colorIdentity": ["Colorless", "R"],
|
||||
"manaCost": ["{1}", "{R}"],
|
||||
"manaValue": [1, 1],
|
||||
"type": ["Artifact", "Instant"],
|
||||
"text": ["Add two colorless mana", "Deal 3 damage"],
|
||||
}
|
||||
)
|
||||
|
||||
df2 = pd.DataFrame(
|
||||
{
|
||||
"name": ["Counterspell", "Path to Exile"],
|
||||
"faceName": ["Counterspell", "Path to Exile"],
|
||||
"colorIdentity": ["U", "W"],
|
||||
"manaCost": ["{U}{U}", "{W}"],
|
||||
"manaValue": [2, 1],
|
||||
"type": ["Instant", "Instant"],
|
||||
"text": ["Counter target spell", "Exile target creature"],
|
||||
}
|
||||
)
|
||||
|
||||
df1.to_csv(os.path.join(source_dir, "blue_cards.csv"), index=False)
|
||||
df2.to_csv(os.path.join(source_dir, "white_cards.csv"), index=False)
|
||||
|
||||
# Create excluded files (should be ignored)
|
||||
df1.to_csv(os.path.join(source_dir, "cards.csv"), index=False)
|
||||
df1.to_csv(os.path.join(source_dir, "commander_cards.csv"), index=False)
|
||||
|
||||
# Aggregate
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
output_path = os.path.join(output_dir, "all_cards.parquet")
|
||||
stats = aggregator.aggregate_all(source_dir, output_path)
|
||||
|
||||
# Verify stats
|
||||
assert stats["files_processed"] == 2 # Only 2 files (excluded 2)
|
||||
assert stats["total_cards"] == 4 # 2 + 2 cards
|
||||
assert stats["duplicates_removed"] == 0
|
||||
assert os.path.exists(output_path)
|
||||
|
||||
# Verify output
|
||||
df_result = pd.read_parquet(output_path)
|
||||
assert len(df_result) == 4
|
||||
assert "Sol Ring" in df_result["name"].values
|
||||
assert "Counterspell" in df_result["name"].values
|
||||
|
||||
|
||||
def test_aggregate_with_duplicates(temp_dirs):
|
||||
"""Test aggregation with duplicate cards across files."""
|
||||
source_dir, output_dir = temp_dirs
|
||||
|
||||
# Create two files with the same card
|
||||
df1 = pd.DataFrame(
|
||||
{
|
||||
"name": ["Sol Ring"],
|
||||
"faceName": ["Sol Ring"],
|
||||
"colorIdentity": ["Colorless"],
|
||||
"manaCost": ["{1}"],
|
||||
"manaValue": [1],
|
||||
"type": ["Artifact"],
|
||||
"text": ["Version 1"],
|
||||
}
|
||||
)
|
||||
|
||||
df2 = pd.DataFrame(
|
||||
{
|
||||
"name": ["Sol Ring"],
|
||||
"faceName": ["Sol Ring"],
|
||||
"colorIdentity": ["Colorless"],
|
||||
"manaCost": ["{1}"],
|
||||
"manaValue": [1],
|
||||
"type": ["Artifact"],
|
||||
"text": ["Version 2 (newer)"],
|
||||
}
|
||||
)
|
||||
|
||||
# Write file1 first, then file2 (file2 is newer)
|
||||
file1 = os.path.join(source_dir, "file1.csv")
|
||||
file2 = os.path.join(source_dir, "file2.csv")
|
||||
df1.to_csv(file1, index=False)
|
||||
df2.to_csv(file2, index=False)
|
||||
|
||||
# Make file2 newer by touching it
|
||||
os.utime(file2, (datetime.now().timestamp() + 1, datetime.now().timestamp() + 1))
|
||||
|
||||
# Aggregate
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
output_path = os.path.join(output_dir, "all_cards.parquet")
|
||||
stats = aggregator.aggregate_all(source_dir, output_path)
|
||||
|
||||
# Should have removed 1 duplicate
|
||||
assert stats["duplicates_removed"] == 1
|
||||
assert stats["total_cards"] == 1
|
||||
|
||||
# Should keep the newer version (file2)
|
||||
df_result = pd.read_parquet(output_path)
|
||||
assert "Version 2 (newer)" in df_result["text"].iloc[0]
|
||||
|
||||
|
||||
def test_validate_output(temp_dirs, sample_card_data):
|
||||
"""Test output validation."""
|
||||
source_dir, output_dir = temp_dirs
|
||||
|
||||
# Create and aggregate test data
|
||||
df = pd.DataFrame(sample_card_data)
|
||||
df.to_csv(os.path.join(source_dir, "test_cards.csv"), index=False)
|
||||
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
output_path = os.path.join(output_dir, "all_cards.parquet")
|
||||
aggregator.aggregate_all(source_dir, output_path)
|
||||
|
||||
# Validate
|
||||
is_valid, errors = aggregator.validate_output(output_path, source_dir)
|
||||
|
||||
assert is_valid
|
||||
assert len(errors) == 0
|
||||
|
||||
|
||||
def test_validate_missing_file(temp_dirs):
|
||||
"""Test validation with missing output file."""
|
||||
source_dir, output_dir = temp_dirs
|
||||
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
output_path = os.path.join(output_dir, "nonexistent.parquet")
|
||||
|
||||
is_valid, errors = aggregator.validate_output(output_path, source_dir)
|
||||
|
||||
assert not is_valid
|
||||
assert len(errors) > 0
|
||||
assert "not found" in errors[0].lower()
|
||||
|
||||
|
||||
def test_rotate_versions(temp_dirs, sample_card_data):
|
||||
"""Test version rotation."""
|
||||
_, output_dir = temp_dirs
|
||||
|
||||
# Create initial file
|
||||
df = pd.DataFrame(sample_card_data)
|
||||
output_path = os.path.join(output_dir, "all_cards.parquet")
|
||||
df.to_parquet(output_path)
|
||||
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
|
||||
# Rotate versions
|
||||
aggregator.rotate_versions(output_path, keep_versions=3)
|
||||
|
||||
# Should have created v1
|
||||
v1_path = os.path.join(output_dir, "all_cards_v1.parquet")
|
||||
assert os.path.exists(v1_path)
|
||||
assert not os.path.exists(output_path) # Original moved to v1
|
||||
|
||||
# Create new file and rotate again
|
||||
df.to_parquet(output_path)
|
||||
aggregator.rotate_versions(output_path, keep_versions=3)
|
||||
|
||||
# Should have v1 and v2
|
||||
v2_path = os.path.join(output_dir, "all_cards_v2.parquet")
|
||||
assert os.path.exists(v1_path)
|
||||
assert os.path.exists(v2_path)
|
||||
|
||||
|
||||
def test_detect_changes(temp_dirs):
|
||||
"""Test change detection for incremental updates."""
|
||||
source_dir, output_dir = temp_dirs
|
||||
|
||||
# Create metadata file
|
||||
metadata_path = os.path.join(output_dir, ".aggregate_metadata.json")
|
||||
past_time = (datetime.now() - timedelta(hours=1)).isoformat()
|
||||
metadata = {"timestamp": past_time}
|
||||
with open(metadata_path, "w") as f:
|
||||
json.dump(metadata, f)
|
||||
|
||||
# Create CSV files (one old, one new)
|
||||
old_file = os.path.join(source_dir, "old_cards.csv")
|
||||
new_file = os.path.join(source_dir, "new_cards.csv")
|
||||
|
||||
df = pd.DataFrame({"name": ["Test Card"]})
|
||||
df.to_csv(old_file, index=False)
|
||||
df.to_csv(new_file, index=False)
|
||||
|
||||
# Make old_file older than metadata
|
||||
old_time = (datetime.now() - timedelta(hours=2)).timestamp()
|
||||
os.utime(old_file, (old_time, old_time))
|
||||
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
changed_files = aggregator.detect_changes(source_dir, metadata_path)
|
||||
|
||||
# Should only detect new_file as changed
|
||||
assert len(changed_files) == 1
|
||||
assert os.path.basename(changed_files[0]) == "new_cards.csv"
|
||||
|
||||
|
||||
def test_aggregate_all_no_files(temp_dirs):
|
||||
"""Test aggregation with no CSV files."""
|
||||
source_dir, output_dir = temp_dirs
|
||||
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
output_path = os.path.join(output_dir, "all_cards.parquet")
|
||||
|
||||
with pytest.raises(ValueError, match="No CSV files found"):
|
||||
aggregator.aggregate_all(source_dir, output_path)
|
||||
|
||||
|
||||
def test_aggregate_all_empty_files(temp_dirs):
|
||||
"""Test aggregation with empty CSV files."""
|
||||
source_dir, output_dir = temp_dirs
|
||||
|
||||
# Create empty CSV file
|
||||
empty_file = os.path.join(source_dir, "empty.csv")
|
||||
pd.DataFrame().to_csv(empty_file, index=False)
|
||||
|
||||
aggregator = CardAggregator(output_dir=output_dir)
|
||||
output_path = os.path.join(output_dir, "all_cards.parquet")
|
||||
|
||||
with pytest.raises(ValueError, match="No valid CSV files"):
|
||||
aggregator.aggregate_all(source_dir, output_path)
|
||||
280
code/tests/test_migration_compatibility.py
Normal file
280
code/tests/test_migration_compatibility.py
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
"""
|
||||
Migration Compatibility Tests
|
||||
|
||||
Ensures backward compatibility during migration from individual CSV files
|
||||
to consolidated all_cards.parquet. Tests verify that legacy adapter functions
|
||||
produce identical results to direct AllCardsLoader calls.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from code.services.all_cards_loader import AllCardsLoader
|
||||
from code.services.legacy_loader_adapter import (
|
||||
load_all_cards,
|
||||
load_cards_by_color_identity,
|
||||
load_cards_by_name,
|
||||
load_cards_by_names,
|
||||
load_cards_by_type,
|
||||
load_cards_with_tag,
|
||||
load_cards_with_tags,
|
||||
search_cards,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_cards_df():
|
||||
"""Create a sample DataFrame for testing."""
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"name": [
|
||||
"Sol Ring",
|
||||
"Lightning Bolt",
|
||||
"Counterspell",
|
||||
"Giant Growth",
|
||||
"Goblin Token Maker",
|
||||
],
|
||||
"colorIdentity": ["Colorless", "R", "U", "G", "R"],
|
||||
"type": ["Artifact", "Instant", "Instant", "Instant", "Creature — Goblin"],
|
||||
"text": [
|
||||
"Add two mana",
|
||||
"Deal 3 damage",
|
||||
"Counter target spell",
|
||||
"Target creature gets +3/+3",
|
||||
"When this enters, create two 1/1 red Goblin creature tokens",
|
||||
],
|
||||
"themeTags": ["", "burn,damage", "control,counterspells", "combat,pump", "tokens,goblins"],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_parquet_file(sample_cards_df):
|
||||
"""Create a temporary Parquet file for testing."""
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as tmp:
|
||||
sample_cards_df.to_parquet(tmp.name, engine="pyarrow")
|
||||
yield tmp.name
|
||||
os.unlink(tmp.name)
|
||||
|
||||
|
||||
def test_load_all_cards_adapter(temp_parquet_file):
|
||||
"""Test load_all_cards() legacy function."""
|
||||
# Direct loader call
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.load()
|
||||
|
||||
# Legacy adapter call
|
||||
# Note: We need to temporarily override the loader's file path
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_all_cards()
|
||||
|
||||
# Results should be identical
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_load_cards_by_name_adapter(temp_parquet_file):
|
||||
"""Test load_cards_by_name() legacy function."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.get_by_name("Sol Ring")
|
||||
|
||||
# Setup adapter with test file
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_cards_by_name("Sol Ring")
|
||||
|
||||
# Results should be identical
|
||||
assert adapter_result is not None
|
||||
pd.testing.assert_series_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_load_cards_by_names_adapter(temp_parquet_file):
|
||||
"""Test load_cards_by_names() legacy function."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
names = ["Sol Ring", "Lightning Bolt"]
|
||||
direct_result = loader.get_by_names(names)
|
||||
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_cards_by_names(names)
|
||||
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_load_cards_by_type_adapter(temp_parquet_file):
|
||||
"""Test load_cards_by_type() legacy function."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.filter_by_type("Instant")
|
||||
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_cards_by_type("Instant")
|
||||
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_load_cards_with_tag_adapter(temp_parquet_file):
|
||||
"""Test load_cards_with_tag() legacy function."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.filter_by_themes(["tokens"], mode="any")
|
||||
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_cards_with_tag("tokens")
|
||||
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_load_cards_with_tags_any_mode(temp_parquet_file):
|
||||
"""Test load_cards_with_tags() with mode='any'."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.filter_by_themes(["burn", "tokens"], mode="any")
|
||||
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_cards_with_tags(["burn", "tokens"], require_all=False)
|
||||
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_load_cards_with_tags_all_mode(temp_parquet_file):
|
||||
"""Test load_cards_with_tags() with mode='all'."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.filter_by_themes(["tokens", "goblins"], mode="all")
|
||||
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_cards_with_tags(["tokens", "goblins"], require_all=True)
|
||||
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_load_cards_by_color_identity_adapter(temp_parquet_file):
|
||||
"""Test load_cards_by_color_identity() legacy function."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.filter_by_color_identity(["R"])
|
||||
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = load_cards_by_color_identity(["R"])
|
||||
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_search_cards_adapter(temp_parquet_file):
|
||||
"""Test search_cards() legacy function."""
|
||||
loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
direct_result = loader.search("token", limit=100)
|
||||
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
adapter_result = search_cards("token", limit=100)
|
||||
|
||||
pd.testing.assert_frame_equal(direct_result, adapter_result)
|
||||
|
||||
|
||||
def test_deprecation_warnings_logged(temp_parquet_file, caplog):
|
||||
"""Test that deprecation warnings are properly logged."""
|
||||
from code.services import legacy_loader_adapter
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
load_cards_by_name("Sol Ring")
|
||||
|
||||
# Check that warning was logged
|
||||
assert any("DEPRECATION" in record.message for record in caplog.records)
|
||||
|
||||
|
||||
def test_feature_flag_disabled(temp_parquet_file, monkeypatch):
|
||||
"""Test behavior when USE_ALL_CARDS_FILE is disabled."""
|
||||
# Disable feature flag
|
||||
monkeypatch.setattr("code.settings.USE_ALL_CARDS_FILE", False)
|
||||
|
||||
# Reimport to pick up new setting
|
||||
import importlib
|
||||
from code.services import legacy_loader_adapter
|
||||
importlib.reload(legacy_loader_adapter)
|
||||
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
result = load_all_cards()
|
||||
|
||||
# Should return empty DataFrame when disabled
|
||||
assert result.empty
|
||||
|
||||
|
||||
def test_adapter_uses_shared_loader(temp_parquet_file):
|
||||
"""Test that adapter reuses shared loader instance for performance."""
|
||||
from code.services import legacy_loader_adapter
|
||||
|
||||
# Clear any existing loader
|
||||
legacy_loader_adapter._shared_loader = None
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
load_all_cards()
|
||||
|
||||
loader1 = legacy_loader_adapter._shared_loader
|
||||
|
||||
with pytest.warns(DeprecationWarning):
|
||||
load_cards_by_name("Sol Ring")
|
||||
|
||||
loader2 = legacy_loader_adapter._shared_loader
|
||||
|
||||
# Should be the same instance
|
||||
assert loader1 is loader2
|
||||
|
||||
|
||||
def test_multiple_calls_use_cache(temp_parquet_file, monkeypatch):
|
||||
"""Test that multiple adapter calls benefit from caching."""
|
||||
import time
|
||||
from code.services import legacy_loader_adapter
|
||||
|
||||
# Ensure feature flag is enabled
|
||||
monkeypatch.setattr("code.settings.USE_ALL_CARDS_FILE", True)
|
||||
|
||||
# Reimport to pick up setting
|
||||
import importlib
|
||||
importlib.reload(legacy_loader_adapter)
|
||||
|
||||
legacy_loader_adapter._shared_loader = AllCardsLoader(file_path=temp_parquet_file)
|
||||
|
||||
# First call (loads from disk)
|
||||
start = time.time()
|
||||
with pytest.warns(DeprecationWarning):
|
||||
load_all_cards()
|
||||
first_time = time.time() - start
|
||||
|
||||
# Second call (should use cache)
|
||||
start = time.time()
|
||||
with pytest.warns(DeprecationWarning):
|
||||
load_all_cards()
|
||||
second_time = time.time() - start
|
||||
|
||||
# Cache should make second call faster (or at least not slower)
|
||||
# Use a more lenient check since file is very small
|
||||
assert second_time <= first_time * 2 # Allow some variance
|
||||
Loading…
Add table
Add a link
Reference in a new issue