mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat: consolidate card data into optimized format for faster queries and reduced file sizes
This commit is contained in:
parent
5753bb19f8
commit
f70ffca23e
24 changed files with 2903 additions and 135 deletions
|
|
@ -108,6 +108,53 @@ async def setup_start_get(request: Request):
|
|||
return JSONResponse({"ok": False}, status_code=500)
|
||||
|
||||
|
||||
@router.post("/rebuild-cards")
|
||||
async def rebuild_cards():
|
||||
"""Manually trigger card aggregation (all_cards.parquet, commander_cards.parquet, background_cards.parquet)."""
|
||||
def runner():
|
||||
try:
|
||||
print("Starting manual card aggregation...")
|
||||
from file_setup.card_aggregator import CardAggregator # type: ignore
|
||||
import pandas as pd # type: ignore
|
||||
import os
|
||||
|
||||
aggregator = CardAggregator()
|
||||
|
||||
# Aggregate all_cards.parquet
|
||||
stats = aggregator.aggregate_all('csv_files', 'card_files/all_cards.parquet')
|
||||
print(f"Aggregated {stats['total_cards']} cards into all_cards.parquet ({stats['file_size_mb']} MB)")
|
||||
|
||||
# Convert commander_cards.csv to Parquet
|
||||
commander_csv = 'csv_files/commander_cards.csv'
|
||||
commander_parquet = 'card_files/commander_cards.parquet'
|
||||
if os.path.exists(commander_csv):
|
||||
df_cmd = pd.read_csv(commander_csv, comment='#', low_memory=False)
|
||||
for col in ["power", "toughness", "keywords"]:
|
||||
if col in df_cmd.columns:
|
||||
df_cmd[col] = df_cmd[col].astype(str)
|
||||
df_cmd.to_parquet(commander_parquet, engine="pyarrow", compression="snappy", index=False)
|
||||
print(f"Converted commander_cards.csv to Parquet ({len(df_cmd)} commanders)")
|
||||
|
||||
# Convert background_cards.csv to Parquet
|
||||
background_csv = 'csv_files/background_cards.csv'
|
||||
background_parquet = 'card_files/background_cards.parquet'
|
||||
if os.path.exists(background_csv):
|
||||
df_bg = pd.read_csv(background_csv, comment='#', low_memory=False)
|
||||
for col in ["power", "toughness", "keywords"]:
|
||||
if col in df_bg.columns:
|
||||
df_bg[col] = df_bg[col].astype(str)
|
||||
df_bg.to_parquet(background_parquet, engine="pyarrow", compression="snappy", index=False)
|
||||
print(f"Converted background_cards.csv to Parquet ({len(df_bg)} backgrounds)")
|
||||
|
||||
print("Card aggregation complete!")
|
||||
except Exception as e:
|
||||
print(f"Card aggregation failed: {e}")
|
||||
|
||||
t = threading.Thread(target=runner, daemon=True)
|
||||
t.start()
|
||||
return JSONResponse({"ok": True, "message": "Card aggregation started"}, status_code=202)
|
||||
|
||||
|
||||
@router.get("/", response_class=HTMLResponse)
|
||||
async def setup_index(request: Request) -> HTMLResponse:
|
||||
return templates.TemplateResponse("setup/index.html", {"request": request})
|
||||
|
|
|
|||
|
|
@ -1330,6 +1330,51 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
|
|||
os.makedirs('csv_files', exist_ok=True)
|
||||
with open(flag_path, 'w', encoding='utf-8') as _fh:
|
||||
json.dump({'tagged_at': _dt.now().isoformat(timespec='seconds')}, _fh)
|
||||
|
||||
# Aggregate card files into Parquet AFTER tagging completes
|
||||
try:
|
||||
_write_status({"running": True, "phase": "aggregating", "message": "Consolidating card data...", "percent": 90})
|
||||
out("Aggregating card CSVs into Parquet files...")
|
||||
from file_setup.card_aggregator import CardAggregator # type: ignore
|
||||
aggregator = CardAggregator()
|
||||
|
||||
# Aggregate all_cards.parquet
|
||||
stats = aggregator.aggregate_all('csv_files', 'card_files/all_cards.parquet')
|
||||
out(f"Aggregated {stats['total_cards']} cards into all_cards.parquet ({stats['file_size_mb']} MB)")
|
||||
|
||||
# Convert commander_cards.csv and background_cards.csv to Parquet
|
||||
import pandas as pd # type: ignore
|
||||
|
||||
# Convert commander_cards.csv
|
||||
commander_csv = 'csv_files/commander_cards.csv'
|
||||
commander_parquet = 'card_files/commander_cards.parquet'
|
||||
if os.path.exists(commander_csv):
|
||||
df_cmd = pd.read_csv(commander_csv, comment='#', low_memory=False)
|
||||
# Convert mixed-type columns to strings for Parquet compatibility
|
||||
for col in ["power", "toughness", "keywords"]:
|
||||
if col in df_cmd.columns:
|
||||
df_cmd[col] = df_cmd[col].astype(str)
|
||||
df_cmd.to_parquet(commander_parquet, engine="pyarrow", compression="snappy", index=False)
|
||||
out(f"Converted commander_cards.csv to Parquet ({len(df_cmd)} commanders)")
|
||||
|
||||
# Convert background_cards.csv
|
||||
background_csv = 'csv_files/background_cards.csv'
|
||||
background_parquet = 'card_files/background_cards.parquet'
|
||||
if os.path.exists(background_csv):
|
||||
df_bg = pd.read_csv(background_csv, comment='#', low_memory=False)
|
||||
# Convert mixed-type columns to strings for Parquet compatibility
|
||||
for col in ["power", "toughness", "keywords"]:
|
||||
if col in df_bg.columns:
|
||||
df_bg[col] = df_bg[col].astype(str)
|
||||
df_bg.to_parquet(background_parquet, engine="pyarrow", compression="snappy", index=False)
|
||||
out(f"Converted background_cards.csv to Parquet ({len(df_bg)} backgrounds)")
|
||||
|
||||
_write_status({"running": True, "phase": "aggregating", "message": "Card aggregation complete", "percent": 95})
|
||||
except Exception as e:
|
||||
# Non-fatal: aggregation failure shouldn't block the rest of setup
|
||||
out(f"Warning: Card aggregation failed: {e}")
|
||||
_write_status({"running": True, "phase": "aggregating", "message": f"Aggregation failed (non-fatal): {e}", "percent": 95})
|
||||
|
||||
# Final status with percent 100 and timing info
|
||||
finished_dt = _dt.now()
|
||||
finished = finished_dt.isoformat(timespec='seconds')
|
||||
|
|
|
|||
|
|
@ -43,8 +43,9 @@
|
|||
<div class="muted" id="themes-stale-line" style="margin-top:.25rem; display:none; color:#f87171;"></div>
|
||||
</div>
|
||||
</details>
|
||||
<div style="margin-top:.75rem;">
|
||||
<div style="margin-top:.75rem; display:flex; gap:.5rem; flex-wrap:wrap;">
|
||||
<button type="button" id="btn-refresh-themes" class="action-btn" onclick="refreshThemes()">Refresh Themes Only</button>
|
||||
<button type="button" id="btn-rebuild-cards" class="action-btn" onclick="rebuildCards()">Rebuild Card Files</button>
|
||||
</div>
|
||||
</section>
|
||||
<script>
|
||||
|
|
@ -214,6 +215,30 @@
|
|||
})
|
||||
.finally(function(){ if (btn) btn.disabled = false; });
|
||||
};
|
||||
window.rebuildCards = function(){
|
||||
var btn = document.getElementById('btn-rebuild-cards');
|
||||
if (btn) btn.disabled = true;
|
||||
if (btn) btn.textContent = 'Rebuilding...';
|
||||
fetch('/setup/rebuild-cards', { method: 'POST', headers: { 'Content-Type': 'application/json' } })
|
||||
.then(function(r){
|
||||
if (!r.ok) throw new Error('Rebuild failed');
|
||||
return r.json();
|
||||
})
|
||||
.then(function(data){
|
||||
if (btn) btn.textContent = 'Rebuild Complete!';
|
||||
setTimeout(function(){
|
||||
if (btn) btn.textContent = 'Rebuild Card Files';
|
||||
if (btn) btn.disabled = false;
|
||||
}, 2000);
|
||||
})
|
||||
.catch(function(err){
|
||||
if (btn) btn.textContent = 'Rebuild Failed';
|
||||
setTimeout(function(){
|
||||
if (btn) btn.textContent = 'Rebuild Card Files';
|
||||
if (btn) btn.disabled = false;
|
||||
}, 2000);
|
||||
});
|
||||
};
|
||||
setInterval(poll, 3000);
|
||||
poll();
|
||||
pollThemes();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue