From fc911b818e52cc523c0fca022a2b92eda21e1ea9 Mon Sep 17 00:00:00 2001 From: matt Date: Fri, 17 Oct 2025 16:41:44 -0700 Subject: [PATCH 1/3] fix: correct module path for all_cards.parquet generation in CI Changed from non-existent code.web.services.card_loader to correct code.file_setup.card_aggregator.CardAggregator module. Fixes ModuleNotFoundError in build-similarity-cache workflow. --- .github/workflows/build-similarity-cache.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-similarity-cache.yml b/.github/workflows/build-similarity-cache.yml index f66cd8c..aac38bb 100644 --- a/.github/workflows/build-similarity-cache.yml +++ b/.github/workflows/build-similarity-cache.yml @@ -86,7 +86,7 @@ jobs: - name: Build all_cards.parquet (needed for similarity cache, but not committed) if: steps.check_cache.outputs.needs_build == 'true' run: | - python -c "from code.web.services.card_loader import CardCatalogLoader; loader = CardCatalogLoader(); df = loader.load(); print(f'Created all_cards.parquet with {len(df):,} cards')" + python -c "from code.file_setup.card_aggregator import CardAggregator; agg = CardAggregator(); stats = agg.aggregate_all('csv_files', 'card_files/all_cards.parquet'); print(f'Created all_cards.parquet with {stats[\"total_cards\"]:,} cards')" - name: Build similarity cache (Parquet) if: steps.check_cache.outputs.needs_build == 'true' From b26057f68d6c4052c22703c713acaf5f0711ec9d Mon Sep 17 00:00:00 2001 From: matt Date: Fri, 17 Oct 2025 16:54:51 -0700 Subject: [PATCH 2/3] fix: allow similarity cache files in card_files directory Added .gitignore exceptions for: - card_files/similarity_cache.parquet - card_files/similarity_cache_metadata.json These files need to be committed to the similarity-cache-data branch for distribution with Docker builds. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index f8e1a3c..dae3847 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,8 @@ csv_files/* !csv_files/testdata/ !csv_files/testdata/**/* card_files/* +!card_files/similarity_cache.parquet +!card_files/similarity_cache_metadata.json deck_files/ dist/ From 86752b351b8bdbedccd0c1d9a3b95251e17c5c76 Mon Sep 17 00:00:00 2001 From: matt Date: Fri, 17 Oct 2025 17:11:04 -0700 Subject: [PATCH 3/3] feat: optimize cache workflow with orphan branch and age check - Create/use orphan branch 'similarity-cache-data' for cache distribution - Add age check to dockerhub-publish: only rebuild if cache >7 days old - Use git add -f to force-add cache files (keeps .gitignore clean) - Weekly scheduled builds will keep cache fresh automatically This avoids rebuilding cache on every Docker publish while ensuring cache is always reasonably fresh (<7 days old). --- .github/workflows/build-similarity-cache.yml | 28 +++++++++++--- .github/workflows/dockerhub-publish.yml | 40 +++++++++++++++++++- .gitignore | 2 - 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-similarity-cache.yml b/.github/workflows/build-similarity-cache.yml index aac38bb..dedd2f4 100644 --- a/.github/workflows/build-similarity-cache.yml +++ b/.github/workflows/build-similarity-cache.yml @@ -145,12 +145,30 @@ jobs: git config --local user.email "github-actions[bot]@users.noreply.github.com" git config --local user.name "github-actions[bot]" - # Switch to or create dedicated cache branch - git checkout -b similarity-cache-data || git checkout similarity-cache-data + # Fetch all branches + git fetch origin - # Add only the similarity cache files (not all_cards.parquet) - git add card_files/similarity_cache.parquet - git add card_files/similarity_cache_metadata.json + # Try to checkout existing branch, or create new orphan branch + if git ls-remote --heads origin similarity-cache-data | grep similarity-cache-data; then + echo "Checking out existing similarity-cache-data branch..." + git checkout similarity-cache-data + else + echo "Creating new orphan branch similarity-cache-data..." + git checkout --orphan similarity-cache-data + git rm -rf . || true + # Create minimal README for the branch + echo "# Similarity Cache Data" > README.md + echo "This branch contains pre-built similarity cache files for the MTG Deckbuilder." >> README.md + echo "Updated automatically by GitHub Actions." >> README.md + fi + + # Ensure card_files directory exists + mkdir -p card_files + + # Add only the similarity cache files (use -f to override .gitignore) + git add -f card_files/similarity_cache.parquet + git add -f card_files/similarity_cache_metadata.json + git add README.md 2>/dev/null || true # Check if there are changes to commit if git diff --staged --quiet; then diff --git a/.github/workflows/dockerhub-publish.yml b/.github/workflows/dockerhub-publish.yml index 4efa2e9..54b10fa 100644 --- a/.github/workflows/dockerhub-publish.yml +++ b/.github/workflows/dockerhub-publish.yml @@ -7,15 +7,53 @@ on: workflow_dispatch: jobs: + check-cache-age: + name: Check similarity cache age + runs-on: ubuntu-latest + outputs: + needs_rebuild: ${{ steps.check.outputs.needs_rebuild }} + steps: + - name: Check cache age + id: check + run: | + # Check if cache is older than 7 days + CACHE_URL="https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache_metadata.json" + + if wget -q --spider "$CACHE_URL"; then + wget -q "$CACHE_URL" -O metadata.json + BUILD_DATE=$(jq -r '.build_date' metadata.json) + + # Calculate age in seconds + BUILD_EPOCH=$(date -d "$BUILD_DATE" +%s 2>/dev/null || echo 0) + NOW_EPOCH=$(date +%s) + AGE_DAYS=$(( ($NOW_EPOCH - $BUILD_EPOCH) / 86400 )) + + echo "Cache age: $AGE_DAYS days" + + if [ $AGE_DAYS -gt 7 ]; then + echo "needs_rebuild=true" >> $GITHUB_OUTPUT + echo "Cache is stale (>7 days), will rebuild" + else + echo "needs_rebuild=false" >> $GITHUB_OUTPUT + echo "Cache is fresh (<7 days), skipping rebuild" + fi + else + echo "needs_rebuild=true" >> $GITHUB_OUTPUT + echo "Cache not found, will build" + fi + build-cache: name: Build similarity cache + needs: check-cache-age + if: needs.check-cache-age.outputs.needs_rebuild == 'true' uses: ./.github/workflows/build-similarity-cache.yml secrets: inherit prepare: name: Prepare metadata runs-on: ubuntu-latest - needs: build-cache + needs: [check-cache-age, build-cache] + if: always() && (needs.build-cache.result == 'success' || needs.build-cache.result == 'skipped') permissions: contents: read outputs: diff --git a/.gitignore b/.gitignore index dae3847..f8e1a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -31,8 +31,6 @@ csv_files/* !csv_files/testdata/ !csv_files/testdata/**/* card_files/* -!card_files/similarity_cache.parquet -!card_files/similarity_cache_metadata.json deck_files/ dist/