feat(web,docs): visual summaries (curve, pips/sources incl. 'C', non‑land sources), tooltip copy, favicon; diagnostics (/healthz, request‑id, global handlers); fetches excluded, basics CSV fallback, list highlight polish; README/DOCKER/release-notes/CHANGELOG updated

2026-03-17 18:56:30 +01:00 · 2025-08-26 20:00:07 -07:00 · 2025-08-26 20:00:07 -07:00 · 8d1f6a8ac4
commit 8d1f6a8ac4
parent 625f6abb13
27 changed files with 1704 additions and 154 deletions
--- a/code/web/services/orchestrator.py
+++ b/code/web/services/orchestrator.py
@ -548,55 +548,92 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
                out(f"Initial setup failed: {e}")
                _write_status({"running": False, "phase": "error", "message": f"Initial setup failed: {e}"})
                return
-            # Tagging with granular color progress
+            # Tagging with progress; support parallel workers for speed
            try:
                from tagging import tagger as _tagger  # type: ignore
                from settings import COLORS as _COLORS  # type: ignore
                colors = list(_COLORS)
                total = len(colors)
+                use_parallel = str(os.getenv('WEB_TAG_PARALLEL', '1')).strip().lower() in {"1","true","yes","on"}
+                max_workers_env = os.getenv('WEB_TAG_WORKERS')
+                try:
+                    max_workers = int(max_workers_env) if max_workers_env else None
+                except Exception:
+                    max_workers = None
                _write_status({
                    "running": True,
                    "phase": "tagging",
-                    "message": "Tagging cards (this may take a while)...",
+                    "message": "Tagging cards (this may take a while)..." if not use_parallel else "Tagging cards in parallel...",
                    "color": None,
                    "percent": 0,
                    "color_idx": 0,
                    "color_total": total,
                    "tagging_started_at": _dt.now().isoformat(timespec='seconds')
                })
-                for idx, _color in enumerate(colors, start=1):
+
+                if use_parallel:
                    try:
-                        pct = int((idx - 1) * 100 / max(1, total))
-                        # Estimate ETA based on average time per completed color
-                        eta_s = None
-                        try:
-                            from datetime import datetime as __dt
-                            ts = __dt.fromisoformat(json.load(open(os.path.join('csv_files', '.setup_status.json'), 'r', encoding='utf-8')).get('tagging_started_at'))  # type: ignore
-                            elapsed = max(0.0, (_dt.now() - ts).total_seconds())
-                            completed = max(0, idx - 1)
-                            if completed > 0:
-                                avg = elapsed / completed
-                                remaining = max(0, total - completed)
-                                eta_s = int(avg * remaining)
-                        except Exception:
-                            eta_s = None
-                        payload = {
-                            "running": True,
-                            "phase": "tagging",
-                            "message": f"Tagging {_color}...",
-                            "color": _color,
-                            "percent": pct,
-                            "color_idx": idx,
-                            "color_total": total,
-                        }
-                        if eta_s is not None:
-                            payload["eta_seconds"] = eta_s
-                        _write_status(payload)
-                        _tagger.load_dataframe(_color)
+                        import concurrent.futures as _f
+                        completed = 0
+                        with _f.ProcessPoolExecutor(max_workers=max_workers) as ex:
+                            fut_map = {ex.submit(_tagger.load_dataframe, c): c for c in colors}
+                            for fut in _f.as_completed(fut_map):
+                                c = fut_map[fut]
+                                try:
+                                    fut.result()
+                                    completed += 1
+                                    pct = int(completed * 100 / max(1, total))
+                                    _write_status({
+                                        "running": True,
+                                        "phase": "tagging",
+                                        "message": f"Tagged {c}",
+                                        "color": c,
+                                        "percent": pct,
+                                        "color_idx": completed,
+                                        "color_total": total,
+                                    })
+                                except Exception as e:
+                                    out(f"Parallel tagging failed for {c}: {e}")
+                                    _write_status({"running": False, "phase": "error", "message": f"Tagging {c} failed: {e}", "color": c})
+                                    return
                    except Exception as e:
-                        out(f"Tagging {_color} failed: {e}")
-                        _write_status({"running": False, "phase": "error", "message": f"Tagging {_color} failed: {e}", "color": _color})
-                        return
+                        out(f"Parallel tagging init failed: {e}; falling back to sequential")
+                        use_parallel = False
+
+                if not use_parallel:
+                    for idx, _color in enumerate(colors, start=1):
+                        try:
+                            pct = int((idx - 1) * 100 / max(1, total))
+                            # Estimate ETA based on average time per completed color
+                            eta_s = None
+                            try:
+                                from datetime import datetime as __dt
+                                ts = __dt.fromisoformat(json.load(open(os.path.join('csv_files', '.setup_status.json'), 'r', encoding='utf-8')).get('tagging_started_at'))  # type: ignore
+                                elapsed = max(0.0, (_dt.now() - ts).total_seconds())
+                                completed = max(0, idx - 1)
+                                if completed > 0:
+                                    avg = elapsed / completed
+                                    remaining = max(0, total - completed)
+                                    eta_s = int(avg * remaining)
+                            except Exception:
+                                eta_s = None
+                            payload = {
+                                "running": True,
+                                "phase": "tagging",
+                                "message": f"Tagging {_color}...",
+                                "color": _color,
+                                "percent": pct,
+                                "color_idx": idx,
+                                "color_total": total,
+                            }
+                            if eta_s is not None:
+                                payload["eta_seconds"] = eta_s
+                            _write_status(payload)
+                            _tagger.load_dataframe(_color)
+                        except Exception as e:
+                            out(f"Tagging {_color} failed: {e}")
+                            _write_status({"running": False, "phase": "error", "message": f"Tagging {_color} failed: {e}", "color": _color})
+                            return
            except Exception as e:
                out(f"Tagging failed to start: {e}")
                _write_status({"running": False, "phase": "error", "message": f"Tagging failed to start: {e}"})
@ -1117,6 +1154,21 @@ def run_stage(ctx: Dict[str, Any], rerun: bool = False, show_skipped: bool = Fal

        # If this stage added cards, present it and advance idx
        if added_cards:
+            # Progress counts
+            try:
+                total_cards = 0
+                for _n, _e in getattr(b, 'card_library', {}).items():
+                    try:
+                        total_cards += int(_e.get('Count', 1))
+                    except Exception:
+                        total_cards += 1
+            except Exception:
+                total_cards = None
+            added_total = 0
+            try:
+                added_total = sum(int(c.get('count', 0) or 0) for c in added_cards)
+            except Exception:
+                added_total = 0
            ctx["snapshot"] = snap_before  # snapshot for rerun
            ctx["idx"] = i + 1
            ctx["last_visible_idx"] = i + 1
@ -1127,10 +1179,22 @@ def run_stage(ctx: Dict[str, Any], rerun: bool = False, show_skipped: bool = Fal
                "added_cards": added_cards,
                "idx": i + 1,
                "total": len(stages),
+                "total_cards": total_cards,
+                "added_total": added_total,
            }

        # No cards added: either skip or surface as a 'skipped' stage
        if show_skipped:
+            # Progress counts even when skipped
+            try:
+                total_cards = 0
+                for _n, _e in getattr(b, 'card_library', {}).items():
+                    try:
+                        total_cards += int(_e.get('Count', 1))
+                    except Exception:
+                        total_cards += 1
+            except Exception:
+                total_cards = None
            ctx["snapshot"] = snap_before
            ctx["idx"] = i + 1
            ctx["last_visible_idx"] = i + 1
@ -1142,6 +1206,8 @@ def run_stage(ctx: Dict[str, Any], rerun: bool = False, show_skipped: bool = Fal
                "skipped": True,
                "idx": i + 1,
                "total": len(stages),
+                "total_cards": total_cards,
+                "added_total": 0,
            }

        # No cards added and not showing skipped: advance to next
@ -1194,6 +1260,16 @@ def run_stage(ctx: Dict[str, Any], rerun: bool = False, show_skipped: bool = Fal
                _json.dump(payload, f, ensure_ascii=False, indent=2)
    except Exception:
        pass
+    # Final progress
+    try:
+        total_cards = 0
+        for _n, _e in getattr(b, 'card_library', {}).items():
+            try:
+                total_cards += int(_e.get('Count', 1))
+            except Exception:
+                total_cards += 1
+    except Exception:
+        total_cards = None
    return {
        "done": True,
        "label": "Complete",
@ -1203,4 +1279,6 @@ def run_stage(ctx: Dict[str, Any], rerun: bool = False, show_skipped: bool = Fal
        "csv_path": ctx.get("csv_path"),
        "txt_path": ctx.get("txt_path"),
        "summary": summary,
+        "total_cards": total_cards,
+        "added_total": 0,
    }
--- a/code/web/services/owned_store.py
+++ b/code/web/services/owned_store.py
@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Iterable, List, Tuple, Dict
 import json
 import os
+import time


 def _owned_dir() -> Path:
@ -108,6 +109,16 @@ def add_names(names: Iterable[str]) -> Tuple[int, int]:
    data["names"] = cur
    if "meta" not in data or not isinstance(data.get("meta"), dict):
        data["meta"] = {}
+    meta = data["meta"]
+    now = int(time.time())
+    # Ensure newly added names have an added_at
+    for s in cur:
+        info = meta.get(s)
+        if not info:
+            meta[s] = {"added_at": now}
+        else:
+            if "added_at" not in info:
+                info["added_at"] = now
    _save_raw(data)
    return added, len(cur)

@ -263,10 +274,16 @@ def add_and_enrich(names: Iterable[str]) -> Tuple[int, int]:
            continue
    # Enrich
    meta = data.get("meta") or {}
+    now = int(time.time())
    if new_names:
        enriched = _enrich_from_csvs(new_names)
        for nm, info in enriched.items():
            meta[nm] = info
+        # Stamp added_at for new names if missing
+        for nm in new_names:
+            entry = meta.setdefault(nm, {})
+            if "added_at" not in entry:
+                entry["added_at"] = now
    data["names"] = current_names
    data["meta"] = meta
    _save_raw(data)
@ -285,7 +302,15 @@ def get_enriched() -> Tuple[List[str], Dict[str, List[str]], Dict[str, str], Dic
    colors_by_name: Dict[str, List[str]] = {}
    for n in names:
        info = meta.get(n) or {}
-        tags = info.get('tags') or []
+        tags = (info.get('tags') or [])
+        user_tags = (info.get('user_tags') or [])
+        if user_tags:
+            # merge user tags (unique, case-insensitive)
+            seen = {str(t).lower() for t in tags}
+            for ut in user_tags:
+                if str(ut).lower() not in seen:
+                    (tags or []).append(str(ut))
+                    seen.add(str(ut).lower())
        typ = info.get('type') or None
        cols = info.get('colors') or []
        if tags:
@ -297,6 +322,114 @@ def get_enriched() -> Tuple[List[str], Dict[str, List[str]], Dict[str, str], Dic
    return names, tags_by_name, type_by_name, colors_by_name


+def add_user_tag(names: Iterable[str], tag: str) -> int:
+    """Add a user-defined tag to the given names; returns number of names updated."""
+    t = str(tag or '').strip()
+    if not t:
+        return 0
+    data = _load_raw()
+    cur = [str(x).strip() for x in (data.get('names') or []) if str(x).strip()]
+    target = {str(n).strip().lower() for n in (names or []) if str(n).strip()}
+    meta = data.get('meta') or {}
+    updated = 0
+    for s in cur:
+        if s.lower() not in target:
+            continue
+        entry = meta.setdefault(s, {})
+        arr = entry.get('user_tags') or []
+        if not any(str(x).strip().lower() == t.lower() for x in arr):
+            arr.append(t)
+            entry['user_tags'] = arr
+            updated += 1
+    data['meta'] = meta
+    _save_raw(data)
+    return updated
+
+
+def remove_user_tag(names: Iterable[str], tag: str) -> int:
+    """Remove a user-defined tag from the given names; returns number of names updated."""
+    t = str(tag or '').strip()
+    if not t:
+        return 0
+    data = _load_raw()
+    cur = [str(x).strip() for x in (data.get('names') or []) if str(x).strip()]
+    target = {str(n).strip().lower() for n in (names or []) if str(n).strip()}
+    meta = data.get('meta') or {}
+    updated = 0
+    for s in cur:
+        if s.lower() not in target:
+            continue
+        entry = meta.get(s) or {}
+        arr = [x for x in (entry.get('user_tags') or []) if str(x)]
+        before = len(arr)
+        arr = [x for x in arr if str(x).strip().lower() != t.lower()]
+        if len(arr) != before:
+            entry['user_tags'] = arr
+            meta[s] = entry
+            updated += 1
+    data['meta'] = meta
+    _save_raw(data)
+    return updated
+
+
+def get_added_at_map() -> Dict[str, int]:
+    """Return a mapping of name -> added_at unix timestamp (if known)."""
+    data = _load_raw()
+    meta: Dict[str, Dict[str, object]] = data.get("meta") or {}
+    out: Dict[str, int] = {}
+    for n, info in meta.items():
+        try:
+            ts = info.get("added_at")
+            if isinstance(ts, (int, float)):
+                out[n] = int(ts)
+        except Exception:
+            continue
+    return out
+
+
+def remove_names(names: Iterable[str]) -> Tuple[int, int]:
+    """Remove a batch of names; returns (removed_count, total_after)."""
+    target = {str(n).strip().lower() for n in (names or []) if str(n).strip()}
+    if not target:
+        return 0, len(get_names())
+    data = _load_raw()
+    cur = [str(x).strip() for x in (data.get("names") or []) if str(x).strip()]
+    before = len(cur)
+    cur_kept: List[str] = []
+    for s in cur:
+        if s.lower() in target:
+            continue
+        cur_kept.append(s)
+    removed = before - len(cur_kept)
+    data["names"] = cur_kept
+    meta = data.get("meta") or {}
+    # Drop meta entries for removed names
+    for s in list(meta.keys()):
+        try:
+            if s.lower() in target:
+                meta.pop(s, None)
+        except Exception:
+            continue
+    data["meta"] = meta
+    _save_raw(data)
+    return removed, len(cur_kept)
+
+
+def get_user_tags_map() -> Dict[str, list[str]]:
+    """Return a mapping of name -> list of user-defined tags (if any)."""
+    data = _load_raw()
+    meta: Dict[str, Dict[str, object]] = data.get("meta") or {}
+    out: Dict[str, list[str]] = {}
+    for n, info in meta.items():
+        try:
+            arr = [x for x in (info.get("user_tags") or []) if str(x)]
+            if arr:
+                out[n] = [str(x) for x in arr]
+        except Exception:
+            continue
+    return out
+
+
 def parse_txt_bytes(content: bytes) -> List[str]:
    out: List[str] = []
    try: