feat: theme catalog optimization with tag search and faster enrichment

2025-12-16 23:50:12 +01:00 · 2025-10-15 17:17:46 -07:00 · 2025-10-15 17:17:46 -07:00 · 9e6c68f559
commit 9e6c68f559
parent 952b151162
26 changed files with 5906 additions and 5688 deletions
--- a/code/web/app.py
+++ b/code/web/app.py
@ -2205,6 +2205,7 @@ from .routes import themes as themes_routes  # noqa: E402
 from .routes import commanders as commanders_routes  # noqa: E402
 from .routes import partner_suggestions as partner_suggestions_routes  # noqa: E402
 from .routes import telemetry as telemetry_routes  # noqa: E402
+from .routes import cards as cards_routes  # noqa: E402
 app.include_router(build_routes.router)
 app.include_router(config_routes.router)
 app.include_router(decks_routes.router)
@ -2214,6 +2215,7 @@ app.include_router(themes_routes.router)
 app.include_router(commanders_routes.router)
 app.include_router(partner_suggestions_routes.router)
 app.include_router(telemetry_routes.router)
+app.include_router(cards_routes.router)

 # Warm validation cache early to reduce first-call latency in tests and dev
 try:
--- a/code/web/routes/cards.py
+++ b/code/web/routes/cards.py
@ -0,0 +1,186 @@
+"""Card browsing and tag search API endpoints."""
+from __future__ import annotations
+
+from typing import Optional
+from fastapi import APIRouter, Query
+from fastapi.responses import JSONResponse
+
+# Import tag index from M3
+try:
+    from code.tagging.tag_index import get_tag_index
+except ImportError:
+    from tagging.tag_index import get_tag_index
+
+# Import all cards loader
+try:
+    from code.services.all_cards_loader import AllCardsLoader
+except ImportError:
+    from services.all_cards_loader import AllCardsLoader
+
+router = APIRouter(prefix="/api/cards", tags=["cards"])
+
+# Cache for all_cards loader
+_all_cards_loader: Optional[AllCardsLoader] = None
+
+
+def _get_all_cards_loader() -> AllCardsLoader:
+    """Get cached AllCardsLoader instance."""
+    global _all_cards_loader
+    if _all_cards_loader is None:
+        _all_cards_loader = AllCardsLoader()
+    return _all_cards_loader
+
+
+@router.get("/by-tags")
+async def search_by_tags(
+    tags: str = Query(..., description="Comma-separated list of theme tags"),
+    logic: str = Query("AND", description="Search logic: AND (intersection) or OR (union)"),
+    limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
+) -> JSONResponse:
+    """Search for cards by theme tags.
+    
+    Examples:
+        /api/cards/by-tags?tags=tokens&logic=AND
+        /api/cards/by-tags?tags=tokens,sacrifice&logic=AND
+        /api/cards/by-tags?tags=lifegain,lifelink&logic=OR
+    
+    Args:
+        tags: Comma-separated theme tags to search for
+        logic: "AND" for cards with all tags, "OR" for cards with any tag
+        limit: Maximum results to return
+        
+    Returns:
+        JSON with matching cards and metadata
+    """
+    try:
+        # Parse tags
+        tag_list = [t.strip() for t in tags.split(",") if t.strip()]
+        if not tag_list:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "No valid tags provided"}
+            )
+        
+        # Get tag index and find matching cards
+        tag_index = get_tag_index()
+        
+        if logic.upper() == "AND":
+            card_names = tag_index.get_cards_with_all_tags(tag_list)
+        elif logic.upper() == "OR":
+            card_names = tag_index.get_cards_with_any_tags(tag_list)
+        else:
+            return JSONResponse(
+                status_code=400,
+                content={"error": f"Invalid logic: {logic}. Use AND or OR."}
+            )
+        
+        # Load full card data
+        all_cards = _get_all_cards_loader().load()
+        matching_cards = all_cards[all_cards["name"].isin(card_names)]
+        
+        # Limit results
+        matching_cards = matching_cards.head(limit)
+        
+        # Convert to dict
+        results = matching_cards.to_dict("records")
+        
+        return JSONResponse(content={
+            "tags": tag_list,
+            "logic": logic.upper(),
+            "total_matches": len(card_names),
+            "returned": len(results),
+            "limit": limit,
+            "cards": results
+        })
+        
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Search failed: {str(e)}"}
+        )
+
+
+@router.get("/tags/search")
+async def search_tags(
+    q: str = Query(..., min_length=2, description="Tag prefix to search for"),
+    limit: int = Query(10, ge=1, le=50, description="Maximum number of suggestions"),
+) -> JSONResponse:
+    """Autocomplete search for theme tags.
+    
+    Examples:
+        /api/cards/tags/search?q=life
+        /api/cards/tags/search?q=token&limit=5
+    
+    Args:
+        q: Tag prefix (minimum 2 characters)
+        limit: Maximum suggestions to return
+        
+    Returns:
+        JSON with matching tags sorted by popularity
+    """
+    try:
+        tag_index = get_tag_index()
+        
+        # Get all tags with counts - get_popular_tags returns all tags when given a high limit
+        all_tags_with_counts = tag_index.get_popular_tags(limit=10000)
+        
+        # Filter by prefix (case-insensitive)
+        prefix_lower = q.lower()
+        matches = [
+            (tag, count)
+            for tag, count in all_tags_with_counts
+            if tag.lower().startswith(prefix_lower)
+        ]
+        
+        # Already sorted by popularity from get_popular_tags
+        # Limit results
+        matches = matches[:limit]
+        
+        return JSONResponse(content={
+            "query": q,
+            "matches": [
+                {"tag": tag, "card_count": count}
+                for tag, count in matches
+            ]
+        })
+        
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Tag search failed: {str(e)}"}
+        )
+
+
+@router.get("/tags/popular")
+async def get_popular_tags(
+    limit: int = Query(50, ge=1, le=200, description="Number of popular tags to return"),
+) -> JSONResponse:
+    """Get the most popular theme tags by card count.
+    
+    Examples:
+        /api/cards/tags/popular
+        /api/cards/tags/popular?limit=20
+    
+    Args:
+        limit: Maximum tags to return
+        
+    Returns:
+        JSON with popular tags sorted by card count
+    """
+    try:
+        tag_index = get_tag_index()
+        popular = tag_index.get_popular_tags(limit=limit)
+        
+        return JSONResponse(content={
+            "count": len(popular),
+            "tags": [
+                {"tag": tag, "card_count": count}
+                for tag, count in popular
+            ]
+        })
+        
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to get popular tags: {str(e)}"}
+        )
--- a/code/web/routes/commanders.py
+++ b/code/web/routes/commanders.py
@ -526,6 +526,52 @@ def _build_theme_info(records: Sequence[CommanderRecord]) -> dict[str, Commander
    return info


+@router.get("/theme-autocomplete", response_class=HTMLResponse)
+async def theme_autocomplete(
+    request: Request,
+    theme: str = Query(..., min_length=2, description="Theme prefix to search for"),
+    limit: int = Query(20, ge=1, le=50),
+) -> HTMLResponse:
+    """HTMX endpoint for theme tag autocomplete."""
+    try:
+        # Import tag_index
+        try:
+            from code.tagging.tag_index import get_tag_index
+        except ImportError:
+            from tagging.tag_index import get_tag_index
+        
+        tag_index = get_tag_index()
+        
+        # Get all tags with counts - get_popular_tags returns all tags when given a high limit
+        all_tags_with_counts = tag_index.get_popular_tags(limit=10000)
+        
+        # Filter by prefix (case-insensitive)
+        prefix_lower = theme.lower()
+        matches = [
+            (tag, count)
+            for tag, count in all_tags_with_counts
+            if tag.lower().startswith(prefix_lower)
+        ]
+        
+        # Already sorted by popularity from get_popular_tags
+        matches = matches[:limit]
+        
+        # Generate HTML suggestions with ARIA attributes
+        html_parts = []
+        for tag, count in matches:
+            html_parts.append(
+                f'<div class="autocomplete-item" data-value="{tag}" role="option">'
+                f'{tag} <span class="tag-count">({count})</span></div>'
+            )
+        
+        html = "\n".join(html_parts) if html_parts else '<div class="autocomplete-empty">No matching themes</div>'
+        
+        return HTMLResponse(content=html)
+        
+    except Exception as e:
+        return HTMLResponse(content=f'<div class="autocomplete-error">Error: {str(e)}</div>')
+
+
@router.get("/", response_class=HTMLResponse)
 async def commanders_index(
    request: Request,
--- a/code/web/services/orchestrator.py
+++ b/code/web/services/orchestrator.py
@ -153,40 +153,44 @@ def _display_tags_from_entry(entry: Dict[str, Any]) -> List[str]:
 def _run_theme_metadata_enrichment(out_func=None) -> None:
    """Run full metadata enrichment sequence after theme catalog/YAML generation.

-    Idempotent: each script is safe to re-run; errors are swallowed (logged) to avoid
+    Uses consolidated ThemeEnrichmentPipeline for 5-10x faster processing.
+    Idempotent: safe to re-run; errors are swallowed (logged) to avoid
    impacting primary setup/tagging pipeline. Designed to centralize logic so both
    manual refresh (routes/themes.py) and automatic setup flows invoke identical steps.
    """
    try:
        import os
-        import sys
-        import subprocess
-        root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
-        scripts_dir = os.path.join(root, 'code', 'scripts')
-        py = sys.executable
-        steps: List[List[str]] = [
-            [py, os.path.join(scripts_dir, 'autofill_min_examples.py')],
-            [py, os.path.join(scripts_dir, 'pad_min_examples.py'), '--min', os.environ.get('EDITORIAL_MIN_EXAMPLES', '5')],
-            [py, os.path.join(scripts_dir, 'cleanup_placeholder_examples.py'), '--apply'],
-            [py, os.path.join(scripts_dir, 'purge_anchor_placeholders.py'), '--apply'],
-            # Augment YAML with description / popularity buckets from the freshly built catalog
-            [py, os.path.join(scripts_dir, 'augment_theme_yaml_from_catalog.py')],
-            [py, os.path.join(scripts_dir, 'generate_theme_editorial_suggestions.py'), '--apply', '--limit-yaml', '0'],
-            [py, os.path.join(scripts_dir, 'lint_theme_editorial.py')],  # non-strict lint pass
-        ]
+        from pathlib import Path
+        from code.tagging.theme_enrichment import run_enrichment_pipeline
+        
+        root = Path(__file__).resolve().parents[3]
+        min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
+        
        def _emit(msg: str):
            try:
                if out_func:
                    out_func(msg)
            except Exception:
                pass
-        for cmd in steps:
+        
+        # Run consolidated pipeline instead of 7 separate subprocess scripts
+        stats = run_enrichment_pipeline(
+            root=root,
+            min_examples=min_examples,
+            write=True,
+            enforce_min=False,  # Non-strict lint pass
+            strict=False,
+            progress_callback=_emit,
+        )
+        
+        _emit(f"Theme enrichment complete: {stats.total_themes} themes processed")
+        
+    except Exception as e:
+        if out_func:
            try:
-                subprocess.run(cmd, check=True)
-            except Exception as e:
-                _emit(f"[metadata_enrich] step failed ({os.path.basename(cmd[1]) if len(cmd)>1 else cmd}): {e}")
-                continue
-    except Exception:
+                out_func(f"[metadata_enrich] pipeline failed: {e}")
+            except Exception:
+                pass
        return


@ -1144,6 +1148,13 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
            # Run metadata enrichment (best-effort) after export sequence.
            try:
                _run_theme_metadata_enrichment(out_func)
+                # Rebuild theme_list.json to pick up newly generated example_cards/commanders
+                # from the enrichment pipeline (which populates them from CSV data)
+                if use_merge and os.path.exists(build_script):
+                    args = [_sys.executable, build_script]
+                    if force:
+                        args.append('--force')
+                    _run(args, check=True)
            except Exception:
                pass
            try:
--- a/code/web/templates/commanders/index.html
+++ b/code/web/templates/commanders/index.html
@ -23,15 +23,23 @@
      <span class="filter-label">Commander name</span>
      <input type="search" id="commander-search" name="q" value="{{ query }}" placeholder="Search commander names..." autocomplete="off" />
    </label>
-    <label>
-      <span class="filter-label">Theme</span>
-      <input type="search" id="commander-theme" name="theme" value="{{ theme_query }}" placeholder="Search themes..." list="theme-suggestions" autocomplete="off" />
-    </label>
-    <datalist id="theme-suggestions">
-      {% for name in theme_options[:200] %}
-        <option value="{{ name }}"></option>
-      {% endfor %}
-    </datalist>
+                    <div class="filter-field">
+                    <label for="commander-theme" class="filter-label">Theme:</label>
+                    <div class="autocomplete-container">
+                        <input type="search" id="commander-theme" name="theme" value="{{ theme_query }}" 
+                               placeholder="Search themes..." autocomplete="off" 
+                               role="combobox" 
+                               aria-autocomplete="list" 
+                               aria-controls="theme-suggestions" 
+                               aria-expanded="false"
+                               hx-get="/commanders/theme-autocomplete" 
+                               hx-trigger="keyup changed delay:300ms" 
+                               hx-target="#theme-suggestions" 
+                               hx-include="[name='theme']"
+                               hx-swap="innerHTML" />
+                        <div id="theme-suggestions" class="autocomplete-dropdown" role="listbox" aria-label="Theme suggestions"></div>
+                    </div>
+                </div>
    <label>
      <span class="filter-label">Color identity</span>
      <select id="commander-color" name="color">
@ -185,6 +193,18 @@
    .commander-thumb img { width:100%; }
    .skeleton-thumb { width:min(70vw, 220px); height:calc(min(70vw, 220px) * 1.4); }
  }
+
+  /* Autocomplete dropdown styles */
+  .autocomplete-container { position:relative; width:100%; }
+  .autocomplete-dropdown { position:absolute; top:100%; left:0; right:0; z-index:1000; background:var(--panel); border:1px solid var(--border); border-radius:8px; margin-top:4px; max-height:280px; overflow-y:auto; box-shadow:0 4px 12px rgba(0,0,0,.25); display:none; }
+  .autocomplete-dropdown:not(:empty) { display:block; }
+  .autocomplete-item { padding:.5rem .75rem; cursor:pointer; border-bottom:1px solid var(--border); transition:background .15s ease; }
+  .autocomplete-item:last-child { border-bottom:none; }
+  .autocomplete-item:hover, .autocomplete-item:focus, .autocomplete-item.selected { background:rgba(148,163,184,.15); }
+  .autocomplete-item.selected { background:rgba(148,163,184,.25); border-left:3px solid var(--ring); padding-left:calc(.75rem - 3px); }
+  .autocomplete-item .tag-count { color:var(--muted); font-size:.85rem; float:right; }
+  .autocomplete-empty { padding:.75rem; text-align:center; color:var(--muted); font-size:.85rem; }
+  .autocomplete-error { padding:.75rem; text-align:center; color:#f87171; font-size:.85rem; }
 </style>
 <script>
  (function(){
@ -215,6 +235,107 @@
        resetPage();
        setLastTrigger('theme');
      });
+      
+      // Autocomplete dropdown handling
+      const autocompleteDropdown = document.getElementById('theme-suggestions');
+      if (autocompleteDropdown) {
+        let selectedIndex = -1;
+        
+        // Helper to get all autocomplete items
+        const getItems = () => Array.from(autocompleteDropdown.querySelectorAll('.autocomplete-item'));
+        
+        // Helper to select an item by index
+        const selectItem = (index) => {
+          const items = getItems();
+          items.forEach((item, i) => {
+            if (i === index) {
+              item.classList.add('selected');
+              item.scrollIntoView({ block: 'nearest', behavior: 'smooth' });
+            } else {
+              item.classList.remove('selected');
+            }
+          });
+          selectedIndex = index;
+        };
+        
+        // Helper to apply selected item
+        const applySelectedItem = () => {
+          const items = getItems();
+          const item = items[selectedIndex];
+          if (item && item.dataset.value) {
+            themeField.value = item.dataset.value;
+            autocompleteDropdown.innerHTML = '';
+            selectedIndex = -1;
+            themeField.dispatchEvent(new Event('input', { bubbles: true }));
+            form.dispatchEvent(new Event('submit', { bubbles: true }));
+          }
+        };
+        
+        // Reset selection when dropdown content changes
+        const observer = new MutationObserver(() => {
+          selectedIndex = -1;
+          getItems().forEach(item => item.classList.remove('selected'));
+          // Update aria-expanded based on dropdown content
+          const hasContent = autocompleteDropdown.children.length > 0;
+          themeField.setAttribute('aria-expanded', hasContent ? 'true' : 'false');
+        });
+        observer.observe(autocompleteDropdown, { childList: true });
+        
+        // Click handler for autocomplete items
+        document.body.addEventListener('click', (e) => {
+          const item = e.target.closest('.autocomplete-item');
+          if (item && item.dataset.value) {
+            themeField.value = item.dataset.value;
+            autocompleteDropdown.innerHTML = '';
+            selectedIndex = -1;
+            themeField.dispatchEvent(new Event('input', { bubbles: true }));
+            form.dispatchEvent(new Event('submit', { bubbles: true }));
+          }
+        });
+        
+        // Close dropdown when clicking outside
+        document.addEventListener('click', (e) => {
+          if (!e.target.closest('.autocomplete-container')) {
+            autocompleteDropdown.innerHTML = '';
+            selectedIndex = -1;
+          }
+        });
+        
+        // Keyboard navigation
+        themeField.addEventListener('keydown', (e) => {
+          const items = getItems();
+          const hasItems = items.length > 0;
+          
+          if (e.key === 'Escape') {
+            autocompleteDropdown.innerHTML = '';
+            selectedIndex = -1;
+            e.preventDefault();
+          } else if (e.key === 'ArrowDown' && hasItems) {
+            e.preventDefault();
+            const newIndex = selectedIndex < items.length - 1 ? selectedIndex + 1 : 0;
+            selectItem(newIndex);
+          } else if (e.key === 'ArrowUp' && hasItems) {
+            e.preventDefault();
+            const newIndex = selectedIndex > 0 ? selectedIndex - 1 : items.length - 1;
+            selectItem(newIndex);
+          } else if (e.key === 'Enter' && selectedIndex >= 0 && hasItems) {
+            e.preventDefault();
+            applySelectedItem();
+          }
+        });
+        
+        // Mouse hover to highlight items
+        autocompleteDropdown.addEventListener('mouseover', (e) => {
+          const item = e.target.closest('.autocomplete-item');
+          if (item) {
+            const items = getItems();
+            const index = items.indexOf(item);
+            if (index >= 0) {
+              selectItem(index);
+            }
+          }
+        });
+      }
    }
    form.addEventListener('submit', () => {
      if (!form.dataset.lastTrigger) {