📦 chore: Bump NPM Audit Packages (#12227 )

* 🔧 chore: Update file-type dependency to version 21.3.2 in package-lock.json and package.json - Upgraded the "file-type" package from version 18.7.0 to 21.3.2 to ensure compatibility with the latest features and security updates. - Added new dependencies related to the updated "file-type" package, enhancing functionality and performance. * 🔧 chore: Upgrade undici dependency to version 7.24.1 in package-lock.json and package.json - Updated the "undici" package from version 7.18.2 to 7.24.1 across multiple package files to ensure compatibility with the latest features and security updates. * 🔧 chore: Upgrade yauzl dependency to version 3.2.1 in package-lock.json - Updated the "yauzl" package from version 3.2.0 to 3.2.1 to incorporate the latest features and security updates. * 🔧 chore: Upgrade hono dependency to version 4.12.7 in package-lock.json - Updated the "hono" package from version 4.12.5 to 4.12.7 to incorporate the latest features and security updates.
🧹 fix: Sanitize Artifact Filenames in Code Execution Output (#12222 )
2026-03-15 12:16:33 +01:00 · 2026-03-14 03:36:03 -04:00 · 2026-03-14 03:09:26 -04:00 · 2026-03-14 03:06:29 -04:00 · 2026-03-14 02:57:56 -04:00 · 2026-03-14 01:51:31 -04:00
780 changed files with 86426 additions and 18230 deletions
--- a/.env.example
+++ b/.env.example
@ -47,6 +47,10 @@ TRUST_PROXY=1
 # password policies.
 # MIN_PASSWORD_LENGTH=8

+# When enabled, the app will continue running after encountering uncaught exceptions
+# instead of exiting the process. Not recommended for production unless necessary.
+# CONTINUE_ON_UNCAUGHT_EXCEPTION=false
+
 #===============#
 # JSON Logging  #
 #===============#
@ -61,6 +65,9 @@ CONSOLE_JSON=false
 DEBUG_LOGGING=true
 DEBUG_CONSOLE=false

+# Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect)
+# MEM_DIAG=true
+
 #=============#
 # Permissions #
 #=============#
@ -87,6 +94,16 @@ NODE_MAX_OLD_SPACE_SIZE=6144

 # CONFIG_PATH="/alternative/path/to/librechat.yaml"

+#==================#
+# Langfuse Tracing #
+#==================#
+
+# Get Langfuse API keys for your project from the project settings page: https://cloud.langfuse.com
+
+# LANGFUSE_PUBLIC_KEY=
+# LANGFUSE_SECRET_KEY=
+# LANGFUSE_BASE_URL=
+
 #===================================================#
 #                     Endpoints                     #
 #===================================================#
@ -121,7 +138,7 @@ PROXY=
 #============#

 ANTHROPIC_API_KEY=user_provided
-# ANTHROPIC_MODELS=claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
+# ANTHROPIC_MODELS=claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
 # ANTHROPIC_REVERSE_PROXY=

 # Set to true to use Anthropic models through Google Vertex AI instead of direct API
@ -156,7 +173,8 @@ ANTHROPIC_API_KEY=user_provided
 # BEDROCK_AWS_SESSION_TOKEN=someSessionToken

 # Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
-# BEDROCK_AWS_MODELS=anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
+# BEDROCK_AWS_MODELS=anthropic.claude-sonnet-4-6,anthropic.claude-opus-4-6-v1,anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
+# Cross-region inference model IDs: us.anthropic.claude-sonnet-4-6,us.anthropic.claude-opus-4-6-v1,global.anthropic.claude-opus-4-6-v1

 # See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns

@ -178,10 +196,10 @@ GOOGLE_KEY=user_provided
 # GOOGLE_AUTH_HEADER=true

 # Gemini API (AI Studio)
-# GOOGLE_MODELS=gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
+# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite

 # Vertex AI
-# GOOGLE_MODELS=gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
+# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001

 # GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001

@ -228,10 +246,6 @@ GOOGLE_KEY=user_provided
 # Option A: Use dedicated Gemini API key for image generation
 # GEMINI_API_KEY=your-gemini-api-key

-# Option B: Use Vertex AI (no API key needed, uses service account)
-# Set this to enable Vertex AI and allow tool without requiring API keys
-# GEMINI_VERTEX_ENABLED=true
-
 # Vertex AI model for image generation (defaults to gemini-2.5-flash-image)
 # GEMINI_IMAGE_MODEL=gemini-2.5-flash-image

@ -331,10 +345,6 @@ FLUX_API_BASE_URL=https://api.us1.bfl.ai
 GOOGLE_SEARCH_API_KEY=
 GOOGLE_CSE_ID=

-# YOUTUBE
-#-----------------
-YOUTUBE_API_KEY=
-
 # Stable Diffusion
 #-----------------
 SD_WEBUI_URL=http://host.docker.internal:7860
@ -503,6 +513,9 @@ OPENID_ADMIN_ROLE_TOKEN_KIND=
 OPENID_USERNAME_CLAIM=
 # Set to determine which user info property returned from OpenID Provider to store as the User's name
 OPENID_NAME_CLAIM=
+# Set to determine which user info claim to use as the email/identifier for user matching (e.g., "upn" for Entra ID)
+# When not set, defaults to: email -> preferred_username -> upn
+OPENID_EMAIL_CLAIM=
 # Optional audience parameter for OpenID authorization requests
 OPENID_AUDIENCE=

@ -647,6 +660,9 @@ AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
 AWS_REGION=
 AWS_BUCKET_NAME=
+# Required for path-style S3-compatible providers (MinIO, Hetzner, Backblaze B2, etc.)
+# that don't support virtual-hosted-style URLs (bucket.endpoint). Not needed for AWS S3.
+# AWS_FORCE_PATH_STYLE=false

 #========================#
 # Azure Blob Storage     #
@ -661,7 +677,8 @@ AZURE_CONTAINER_NAME=files
 #========================#

 ALLOW_SHARED_LINKS=true
-ALLOW_SHARED_LINKS_PUBLIC=true
+# Allows unauthenticated access to shared links. Defaults to false (auth required) if not set.
+ALLOW_SHARED_LINKS_PUBLIC=false

 #==============================#
 # Static File Cache Control    #
@ -741,8 +758,10 @@ HELP_AND_FAQ_URL=https://librechat.ai
 # REDIS_PING_INTERVAL=300

 # Force specific cache namespaces to use in-memory storage even when Redis is enabled
-# Comma-separated list of CacheKeys (e.g., ROLES,MESSAGES)
-# FORCED_IN_MEMORY_CACHE_NAMESPACES=ROLES,MESSAGES
+# Comma-separated list of CacheKeys
+# Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container (safe for blue/green deployments)
+# Set to empty string to force all namespaces through Redis: FORCED_IN_MEMORY_CACHE_NAMESPACES=
+# FORCED_IN_MEMORY_CACHE_NAMESPACES=CONFIG_STORE,APP_CONFIG

 # Leader Election Configuration (for multi-instance deployments with Redis)
 # Duration in seconds that the leader lease is valid before it expires (default: 25)
@ -831,3 +850,24 @@ OPENWEATHER_API_KEY=
 # Skip code challenge method validation (e.g., for AWS Cognito that supports S256 but doesn't advertise it)
 # When set to true, forces S256 code challenge even if not advertised in .well-known/openid-configuration
 # MCP_SKIP_CODE_CHALLENGE_CHECK=false
+
+# Circuit breaker: max connect/disconnect cycles before tripping (per server)
+# MCP_CB_MAX_CYCLES=7
+
+# Circuit breaker: sliding window (ms) for counting cycles
+# MCP_CB_CYCLE_WINDOW_MS=45000
+
+# Circuit breaker: cooldown (ms) after the cycle breaker trips
+# MCP_CB_CYCLE_COOLDOWN_MS=15000
+
+# Circuit breaker: max consecutive failed connection rounds before backoff
+# MCP_CB_MAX_FAILED_ROUNDS=3
+
+# Circuit breaker: sliding window (ms) for counting failed rounds
+# MCP_CB_FAILED_WINDOW_MS=120000
+
+# Circuit breaker: base backoff (ms) after failed round threshold is reached
+# MCP_CB_BASE_BACKOFF_MS=30000
+
+# Circuit breaker: max backoff cap (ms) for exponential backoff
+# MCP_CB_MAX_BACKOFF_MS=300000
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@ -26,18 +26,14 @@ Project maintainers have the right and responsibility to remove, edit, or reject

 ## 1. Development Setup

-1. Use Node.JS 20.x.
-2. Install typescript globally: `npm i -g typescript`.
-3. Run `npm ci` to install dependencies.
-4. Build the data provider: `npm run build:data-provider`.
-5. Build data schemas: `npm run build:data-schemas`.
-6. Build API methods: `npm run build:api`.
-7. Setup and run unit tests:
+1. Use Node.js v20.19.0+ or ^22.12.0 or >= 23.0.0.
+2. Run `npm run smart-reinstall` to install dependencies (uses Turborepo). Use `npm run reinstall` for a clean install, or `npm ci` for a fresh lockfile-based install.
+3. Build all compiled code: `npm run build`.
+4. Setup and run unit tests:
    - Copy `.env.test`: `cp api/test/.env.test.example api/test/.env.test`.
    - Run backend unit tests: `npm run test:api`.
    - Run frontend unit tests: `npm run test:client`.
-8. Setup and run integration tests:
-    - Build client: `cd client && npm run build`.
+5. Setup and run integration tests:
    - Create `.env`: `cp .env.example .env`.
    - Install [MongoDB Community Edition](https://www.mongodb.com/docs/manual/administration/install-community/), ensure that `mongosh` connects to your local instance.
    - Run: `npx install playwright`, then `npx playwright install`.
@ -48,11 +44,11 @@ Project maintainers have the right and responsibility to remove, edit, or reject
 ## 2. Development Notes

 1. Before starting work, make sure your main branch has the latest commits with `npm run update`.
-3. Run linting command to find errors: `npm run lint`. Alternatively, ensure husky pre-commit checks are functioning.
+2. Run linting command to find errors: `npm run lint`. Alternatively, ensure husky pre-commit checks are functioning.
 3. After your changes, reinstall packages in your current branch using `npm run reinstall` and ensure everything still works. 
    - Restart the ESLint server ("ESLint: Restart ESLint Server" in VS Code command bar) and your IDE after reinstalling or updating.
 4. Clear web app localStorage and cookies before and after changes.
-5. For frontend changes, compile typescript before and after changes to check for introduced errors: `cd client && npm run build`.
+5. To check for introduced errors, build all compiled code: `npm run build`.
 6. Run backend unit tests: `npm run test:api`.
 7. Run frontend unit tests: `npm run test:client`.
 8. Run integration tests: `npm run e2e`.
@ -118,50 +114,45 @@ Apply the following naming conventions to branches, labels, and other Git-relate
 - **JS/TS:** Directories and file names: Descriptive and camelCase. First letter uppercased for React files (e.g., `helperFunction.ts, ReactComponent.tsx`).
 - **Docs:** Directories and file names: Descriptive and snake_case (e.g., `config_files.md`).

-## 7. TypeScript Conversion
+## 7. Coding Standards
+
+For detailed coding conventions, workspace boundaries, and architecture guidance, refer to the [`AGENTS.md`](../AGENTS.md) file at the project root. It covers code style, type safety, import ordering, iteration/performance expectations, frontend rules, testing, and development commands.
+
+## 8. TypeScript Conversion

 1. **Original State**: The project was initially developed entirely in JavaScript (JS).

-2. **Frontend Transition**:
-   - We are in the process of transitioning the frontend from JS to TypeScript (TS).
-   - The transition is nearing completion.
-   - This conversion is feasible due to React's capability to intermix JS and TS prior to code compilation. It's standard practice to compile/bundle the code in such scenarios.
+2. **Frontend**: Fully transitioned to TypeScript.

-3. **Backend Considerations**:
-   - Transitioning the backend to TypeScript would be a more intricate process, especially for an established Express.js server.
-   
-   - **Options for Transition**:
-      - **Single Phase Overhaul**: This involves converting the entire backend to TypeScript in one go. It's the most straightforward approach but can be disruptive, especially for larger codebases.
-      
-      - **Incremental Transition**: Convert parts of the backend progressively. This can be done by:
-         - Maintaining a separate directory for TypeScript files.
-         - Gradually migrating and testing individual modules or routes.
-         - Using a build tool like `tsc` to compile TypeScript files independently until the entire transition is complete.
-         
-   - **Compilation Considerations**: 
-      - Introducing a compilation step for the server is an option. This would involve using tools like `ts-node` for development and `tsc` for production builds.
-      - However, this is not a conventional approach for Express.js servers and could introduce added complexity, especially in terms of build and deployment processes.
-      
-   - **Current Stance**: At present, this backend transition is of lower priority and might not be pursued.
+3. **Backend**:
+   - The legacy Express.js server remains in `/api` as JavaScript.
+   - All new backend code is written in TypeScript under `/packages/api`, which is compiled and consumed by `/api`.
+   - Shared database logic lives in `/packages/data-schemas` (TypeScript).
+   - Shared frontend/backend API types and services live in `/packages/data-provider` (TypeScript).
+   - Minimize direct changes to `/api`; prefer adding TypeScript code to `/packages/api` and importing it.

-## 8. Module Import Conventions
+## 9. Module Import Conventions

- `npm` packages first, 
-     - from longest line (top) to shortest (bottom)
+Imports are organized into three sections (in order):

- Followed by typescript types (pertains to data-provider and client workspaces)
-     - longest line (top) to shortest (bottom)
-     - types from package come first
+1. **Package imports** — sorted from shortest to longest line length.
+   - `react` is always the first import.
+   - Multi-line (stacked) imports count their total character length across all lines for sorting.

- Lastly, local imports
-     - longest line (top) to shortest (bottom)
-     - imports with alias `~` treated the same as relative import with respect to line length
+2. **`import type` imports** — sorted from longest to shortest line length.
+   - Package type imports come first, then local type imports.
+   - Line length sorting resets between the package and local sub-groups.
+
+3. **Local/project imports** — sorted from longest to shortest line length.
+   - Multi-line (stacked) imports count their total character length across all lines for sorting.
+   - Imports with alias `~` are treated the same as relative imports with respect to line length.
+
+- Consolidate value imports from the same module as much as possible.
+- Always use standalone `import type { ... }` for type imports; never use inline `type` keyword inside value imports (e.g., `import { Foo, type Bar }` is wrong).

 **Note:** ESLint will automatically enforce these import conventions when you run `npm run lint --fix` or through pre-commit hooks.

---
-
-Please ensure that you adapt this summary to fit the specific context and nuances of your project.
+For the full set of coding standards, see [`AGENTS.md`](../AGENTS.md).

 ---

--- a/.github/workflows/backend-review.yml
+++ b/.github/workflows/backend-review.yml
@ -9,48 +9,145 @@ on:
    paths:
      - 'api/**'
      - 'packages/**'
+
+env:
+  NODE_ENV: CI
+  NODE_OPTIONS: '--max-old-space-size=${{ secrets.NODE_MAX_OLD_SPACE_SIZE || 6144 }}'
+
 jobs:
-  tests_Backend:
-    name: Run Backend unit tests
-    timeout-minutes: 60
+  build:
+    name: Build packages
    runs-on: ubuntu-latest
-    env:
-      MONGO_URI: ${{ secrets.MONGO_URI }}
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      JWT_SECRET: ${{ secrets.JWT_SECRET }}
-      CREDS_KEY: ${{ secrets.CREDS_KEY }}
-      CREDS_IV: ${{ secrets.CREDS_IV }}
-      BAN_VIOLATIONS: ${{ secrets.BAN_VIOLATIONS }}
-      BAN_DURATION: ${{ secrets.BAN_DURATION }}
-      BAN_INTERVAL: ${{ secrets.BAN_INTERVAL }}
-      NODE_ENV: CI
-      NODE_OPTIONS: '--max-old-space-size=${{ secrets.NODE_MAX_OLD_SPACE_SIZE || 6144 }}'
+    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4
-      - name: Use Node.js 20.x
+
+      - name: Use Node.js 20.19
        uses: actions/setup-node@v4
        with:
-          node-version: 20
-          cache: 'npm'
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            api/node_modules
+            packages/api/node_modules
+            packages/data-provider/node_modules
+            packages/data-schemas/node_modules
+          key: node-modules-backend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}

      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
        run: npm ci

-      - name: Install Data Provider Package
+      - name: Restore data-provider build cache
+        id: cache-data-provider
+        uses: actions/cache@v4
+        with:
+          path: packages/data-provider/dist
+          key: build-data-provider-${{ runner.os }}-${{ hashFiles('packages/data-provider/src/**', 'packages/data-provider/tsconfig*.json', 'packages/data-provider/rollup.config.js', 'packages/data-provider/package.json') }}
+
+      - name: Build data-provider
+        if: steps.cache-data-provider.outputs.cache-hit != 'true'
        run: npm run build:data-provider

-      - name: Install Data Schemas Package
+      - name: Restore data-schemas build cache
+        id: cache-data-schemas
+        uses: actions/cache@v4
+        with:
+          path: packages/data-schemas/dist
+          key: build-data-schemas-${{ runner.os }}-${{ hashFiles('packages/data-schemas/src/**', 'packages/data-schemas/tsconfig*.json', 'packages/data-schemas/rollup.config.js', 'packages/data-schemas/package.json', 'packages/data-provider/src/**', 'packages/data-provider/tsconfig*.json', 'packages/data-provider/rollup.config.js', 'packages/data-provider/package.json') }}
+
+      - name: Build data-schemas
+        if: steps.cache-data-schemas.outputs.cache-hit != 'true'
        run: npm run build:data-schemas

-      - name: Install API Package
+      - name: Restore api build cache
+        id: cache-api
+        uses: actions/cache@v4
+        with:
+          path: packages/api/dist
+          key: build-api-${{ runner.os }}-${{ hashFiles('packages/api/src/**', 'packages/api/tsconfig*.json', 'packages/api/server-rollup.config.js', 'packages/api/package.json', 'packages/data-provider/src/**', 'packages/data-provider/tsconfig*.json', 'packages/data-provider/rollup.config.js', 'packages/data-provider/package.json', 'packages/data-schemas/src/**', 'packages/data-schemas/tsconfig*.json', 'packages/data-schemas/rollup.config.js', 'packages/data-schemas/package.json') }}
+
+      - name: Build api
+        if: steps.cache-api.outputs.cache-hit != 'true'
        run: npm run build:api

-      - name: Create empty auth.json file
-        run: |
-          mkdir -p api/data
-          echo '{}' > api/data/auth.json
+      - name: Upload data-provider build
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+          retention-days: 2

-      - name: Check for Circular dependency in rollup
+      - name: Upload data-schemas build
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-data-schemas
+          path: packages/data-schemas/dist
+          retention-days: 2
+
+      - name: Upload api build
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-api
+          path: packages/api/dist
+          retention-days: 2
+
+  circular-deps:
+    name: Circular dependency checks
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js 20.19
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            api/node_modules
+            packages/api/node_modules
+            packages/data-provider/node_modules
+            packages/data-schemas/node_modules
+          key: node-modules-backend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}
+
+      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        run: npm ci
+
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Download data-schemas build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-schemas
+          path: packages/data-schemas/dist
+
+      - name: Rebuild @librechat/api and check for circular dependencies
+        run: |
+          output=$(npm run build:api 2>&1)
+          echo "$output"
+          if echo "$output" | grep -q "Circular depend"; then
+            echo "Error: Circular dependency detected in @librechat/api!"
+            exit 1
+          fi
+
+      - name: Detect circular dependencies in rollup
        working-directory: ./packages/data-provider
        run: |
          output=$(npm run rollup:api)
@ -60,17 +157,201 @@ jobs:
            exit 1
          fi

+  test-api:
+    name: 'Tests: api'
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    env:
+      MONGO_URI: ${{ secrets.MONGO_URI }}
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      JWT_SECRET: ${{ secrets.JWT_SECRET }}
+      CREDS_KEY: ${{ secrets.CREDS_KEY }}
+      CREDS_IV: ${{ secrets.CREDS_IV }}
+      BAN_VIOLATIONS: ${{ secrets.BAN_VIOLATIONS }}
+      BAN_DURATION: ${{ secrets.BAN_DURATION }}
+      BAN_INTERVAL: ${{ secrets.BAN_INTERVAL }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js 20.19
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            api/node_modules
+            packages/api/node_modules
+            packages/data-provider/node_modules
+            packages/data-schemas/node_modules
+          key: node-modules-backend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}
+
+      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        run: npm ci
+
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Download data-schemas build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-schemas
+          path: packages/data-schemas/dist
+
+      - name: Download api build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-api
+          path: packages/api/dist
+
+      - name: Create empty auth.json file
+        run: |
+          mkdir -p api/data
+          echo '{}' > api/data/auth.json
+
      - name: Prepare .env.test file
        run: cp api/test/.env.test.example api/test/.env.test

      - name: Run unit tests
        run: cd api && npm run test:ci

-      - name: Run librechat-data-provider unit tests
+  test-data-provider:
+    name: 'Tests: data-provider'
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js 20.19
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            api/node_modules
+            packages/api/node_modules
+            packages/data-provider/node_modules
+            packages/data-schemas/node_modules
+          key: node-modules-backend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}
+
+      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        run: npm ci
+
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Run unit tests
        run: cd packages/data-provider && npm run test:ci

-      - name: Run @librechat/data-schemas unit tests
+  test-data-schemas:
+    name: 'Tests: data-schemas'
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js 20.19
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            api/node_modules
+            packages/api/node_modules
+            packages/data-provider/node_modules
+            packages/data-schemas/node_modules
+          key: node-modules-backend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}
+
+      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        run: npm ci
+
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Download data-schemas build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-schemas
+          path: packages/data-schemas/dist
+
+      - name: Run unit tests
        run: cd packages/data-schemas && npm run test:ci

-      - name: Run @librechat/api unit tests
+  test-packages-api:
+    name: 'Tests: @librechat/api'
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js 20.19
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            api/node_modules
+            packages/api/node_modules
+            packages/data-provider/node_modules
+            packages/data-schemas/node_modules
+          key: node-modules-backend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}
+
+      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        run: npm ci
+
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Download data-schemas build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-schemas
+          path: packages/data-schemas/dist
+
+      - name: Download api build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-api
+          path: packages/api/dist
+
+      - name: Run unit tests
        run: cd packages/api && npm run test:ci
--- a/.github/workflows/frontend-review.yml
+++ b/.github/workflows/frontend-review.yml
@ -2,7 +2,7 @@ name: Frontend Unit Tests

 on:
  pull_request:
-    branches: 
+    branches:
      - main
      - dev
      - dev-staging
@ -11,51 +11,200 @@ on:
      - 'client/**'
      - 'packages/data-provider/**'

+env:
+  NODE_OPTIONS: '--max-old-space-size=${{ secrets.NODE_MAX_OLD_SPACE_SIZE || 6144 }}'
+
 jobs:
-  tests_frontend_ubuntu:
-    name: Run frontend unit tests on Ubuntu
-    timeout-minutes: 60
+  build:
+    name: Build packages
    runs-on: ubuntu-latest
-    env:
-      NODE_OPTIONS: '--max-old-space-size=${{ secrets.NODE_MAX_OLD_SPACE_SIZE || 6144 }}'
+    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4
-      - name: Use Node.js 20.x
+
+      - name: Use Node.js 20.19
        uses: actions/setup-node@v4
        with:
-          node-version: 20
-          cache: 'npm'
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            client/node_modules
+            packages/client/node_modules
+            packages/data-provider/node_modules
+          key: node-modules-frontend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}

      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
        run: npm ci

-      - name: Build Client
-        run: npm run frontend:ci
+      - name: Restore data-provider build cache
+        id: cache-data-provider
+        uses: actions/cache@v4
+        with:
+          path: packages/data-provider/dist
+          key: build-data-provider-${{ runner.os }}-${{ hashFiles('packages/data-provider/src/**', 'packages/data-provider/tsconfig*.json', 'packages/data-provider/rollup.config.js', 'packages/data-provider/package.json') }}
+
+      - name: Build data-provider
+        if: steps.cache-data-provider.outputs.cache-hit != 'true'
+        run: npm run build:data-provider
+
+      - name: Restore client-package build cache
+        id: cache-client-package
+        uses: actions/cache@v4
+        with:
+          path: packages/client/dist
+          key: build-client-package-${{ runner.os }}-${{ hashFiles('packages/client/src/**', 'packages/client/tsconfig*.json', 'packages/client/rollup.config.js', 'packages/client/package.json', 'packages/data-provider/src/**', 'packages/data-provider/tsconfig*.json', 'packages/data-provider/rollup.config.js', 'packages/data-provider/package.json') }}
+
+      - name: Build client-package
+        if: steps.cache-client-package.outputs.cache-hit != 'true'
+        run: npm run build:client-package
+
+      - name: Upload data-provider build
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+          retention-days: 2
+
+      - name: Upload client-package build
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-client-package
+          path: packages/client/dist
+          retention-days: 2
+
+  test-ubuntu:
+    name: 'Tests: Ubuntu'
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js 20.19
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            client/node_modules
+            packages/client/node_modules
+            packages/data-provider/node_modules
+          key: node-modules-frontend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}
+
+      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        run: npm ci
+
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Download client-package build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-client-package
+          path: packages/client/dist

      - name: Run unit tests
        run: npm run test:ci --verbose
        working-directory: client

-  tests_frontend_windows:
-    name: Run frontend unit tests on Windows
-    timeout-minutes: 60
+  test-windows:
+    name: 'Tests: Windows'
+    needs: build
    runs-on: windows-latest
-    env:
-      NODE_OPTIONS: '--max-old-space-size=${{ secrets.NODE_MAX_OLD_SPACE_SIZE || 6144 }}'
+    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4
-      - name: Use Node.js 20.x
+
+      - name: Use Node.js 20.19
        uses: actions/setup-node@v4
        with:
-          node-version: 20
-          cache: 'npm'
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            client/node_modules
+            packages/client/node_modules
+            packages/data-provider/node_modules
+          key: node-modules-frontend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}

      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
        run: npm ci

-      - name: Build Client
-        run: npm run frontend:ci
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Download client-package build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-client-package
+          path: packages/client/dist

      - name: Run unit tests
        run: npm run test:ci --verbose
-        working-directory: client
+        working-directory: client
+
+  build-verify:
+    name: Vite build verification
+    needs: build
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Use Node.js 20.19
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.19'
+
+      - name: Restore node_modules cache
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: |
+            node_modules
+            client/node_modules
+            packages/client/node_modules
+            packages/data-provider/node_modules
+          key: node-modules-frontend-${{ runner.os }}-20.19-${{ hashFiles('package-lock.json') }}
+
+      - name: Install dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        run: npm ci
+
+      - name: Download data-provider build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-data-provider
+          path: packages/data-provider/dist
+
+      - name: Download client-package build
+        uses: actions/download-artifact@v4
+        with:
+          name: build-client-package
+          path: packages/client/dist
+
+      - name: Build client
+        run: cd client && npm run build:ci
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ pids

 # CI/CD data
 test-image*
+dump.rdb

 # Directory for instrumented libs generated by jscoverage/JSCover
 lib-cov
@ -29,6 +30,9 @@ coverage
 config/translations/stores/*
 client/src/localization/languages/*_missing_keys.json

+# Turborepo
+.turbo
+
 # Compiled Dirs (http://nodejs.org/api/addons.html)
 build/
 dist/
--- a/AGENTS.md
+++ b/AGENTS.md
@ -0,0 +1,166 @@
+# LibreChat
+
+## Project Overview
+
+LibreChat is a monorepo with the following key workspaces:
+
+| Workspace | Language | Side | Dependency | Purpose |
+|---|---|---|---|---|
+| `/api` | JS (legacy) | Backend | `packages/api`, `packages/data-schemas`, `packages/data-provider`, `@librechat/agents` | Express server — minimize changes here |
+| `/packages/api` | **TypeScript** | Backend | `packages/data-schemas`, `packages/data-provider` | New backend code lives here (TS only, consumed by `/api`) |
+| `/packages/data-schemas` | TypeScript | Backend | `packages/data-provider` | Database models/schemas, shareable across backend projects |
+| `/packages/data-provider` | TypeScript | Shared | — | Shared API types, endpoints, data-service — used by both frontend and backend |
+| `/client` | TypeScript/React | Frontend | `packages/data-provider`, `packages/client` | Frontend SPA |
+| `/packages/client` | TypeScript | Frontend | `packages/data-provider` | Shared frontend utilities |
+
+The source code for `@librechat/agents` (major backend dependency, same team) is at `/home/danny/agentus`.
+
+---
+
+## Workspace Boundaries
+
+- **All new backend code must be TypeScript** in `/packages/api`.
+- Keep `/api` changes to the absolute minimum (thin JS wrappers calling into `/packages/api`).
+- Database-specific shared logic goes in `/packages/data-schemas`.
+- Frontend/backend shared API logic (endpoints, types, data-service) goes in `/packages/data-provider`.
+- Build data-provider from project root: `npm run build:data-provider`.
+
+---
+
+## Code Style
+
+### Structure and Clarity
+
+- **Never-nesting**: early returns, flat code, minimal indentation. Break complex operations into well-named helpers.
+- **Functional first**: pure functions, immutable data, `map`/`filter`/`reduce` over imperative loops. Only reach for OOP when it clearly improves domain modeling or state encapsulation.
+- **No dynamic imports** unless absolutely necessary.
+
+### DRY
+
+- Extract repeated logic into utility functions.
+- Reusable hooks / higher-order components for UI patterns.
+- Parameterized helpers instead of near-duplicate functions.
+- Constants for repeated values; configuration objects over duplicated init code.
+- Shared validators, centralized error handling, single source of truth for business rules.
+- Shared typing system with interfaces/types extending common base definitions.
+- Abstraction layers for external API interactions.
+
+### Iteration and Performance
+
+- **Minimize looping** — especially over shared data structures like message arrays, which are iterated frequently throughout the codebase. Every additional pass adds up at scale.
+- Consolidate sequential O(n) operations into a single pass whenever possible; never loop over the same collection twice if the work can be combined.
+- Choose data structures that reduce the need to iterate (e.g., `Map`/`Set` for lookups instead of `Array.find`/`Array.includes`).
+- Avoid unnecessary object creation; consider space-time tradeoffs.
+- Prevent memory leaks: careful with closures, dispose resources/event listeners, no circular references.
+
+### Type Safety
+
+- **Never use `any`**. Explicit types for all parameters, return values, and variables.
+- **Limit `unknown`** — avoid `unknown`, `Record<string, unknown>`, and `as unknown as T` assertions. A `Record<string, unknown>` almost always signals a missing explicit type definition.
+- **Don't duplicate types** — before defining a new type, check whether it already exists in the project (especially `packages/data-provider`). Reuse and extend existing types rather than creating redundant definitions.
+- Use union types, generics, and interfaces appropriately.
+- All TypeScript and ESLint warnings/errors must be addressed — do not leave unresolved diagnostics.
+
+### Comments and Documentation
+
+- Write self-documenting code; no inline comments narrating what code does.
+- JSDoc only for complex/non-obvious logic or intellisense on public APIs.
+- Single-line JSDoc for brief docs, multi-line for complex cases.
+- Avoid standalone `//` comments unless absolutely necessary.
+
+### Import Order
+
+Imports are organized into three sections:
+
+1. **Package imports** — sorted shortest to longest line length (`react` always first).
+2. **`import type` imports** — sorted longest to shortest (package types first, then local types; length resets between sub-groups).
+3. **Local/project imports** — sorted longest to shortest.
+
+Multi-line imports count total character length across all lines. Consolidate value imports from the same module. Always use standalone `import type { ... }` — never inline `type` inside value imports.
+
+### JS/TS Loop Preferences
+
+- **Limit looping as much as possible.** Prefer single-pass transformations and avoid re-iterating the same data.
+- `for (let i = 0; ...)` for performance-critical or index-dependent operations.
+- `for...of` for simple array iteration.
+- `for...in` only for object property enumeration.
+
+---
+
+## Frontend Rules (`client/src/**/*`)
+
+### Localization
+
+- All user-facing text must use `useLocalize()`.
+- Only update English keys in `client/src/locales/en/translation.json` (other languages are automated externally).
+- Semantic key prefixes: `com_ui_`, `com_assistants_`, etc.
+
+### Components
+
+- TypeScript for all React components with proper type imports.
+- Semantic HTML with ARIA labels (`role`, `aria-label`) for accessibility.
+- Group related components in feature directories (e.g., `SidePanel/Memories/`).
+- Use index files for clean exports.
+
+### Data Management
+
+- Feature hooks: `client/src/data-provider/[Feature]/queries.ts` → `[Feature]/index.ts` → `client/src/data-provider/index.ts`.
+- React Query (`@tanstack/react-query`) for all API interactions; proper query invalidation on mutations.
+- QueryKeys and MutationKeys in `packages/data-provider/src/keys.ts`.
+
+### Data-Provider Integration
+
+- Endpoints: `packages/data-provider/src/api-endpoints.ts`
+- Data service: `packages/data-provider/src/data-service.ts`
+- Types: `packages/data-provider/src/types/queries.ts`
+- Use `encodeURIComponent` for dynamic URL parameters.
+
+### Performance
+
+- Prioritize memory and speed efficiency at scale.
+- Cursor pagination for large datasets.
+- Proper dependency arrays to avoid unnecessary re-renders.
+- Leverage React Query caching and background refetching.
+
+---
+
+## Development Commands
+
+| Command | Purpose |
+|---|---|
+| `npm run smart-reinstall` | Install deps (if lockfile changed) + build via Turborepo |
+| `npm run reinstall` | Clean install — wipe `node_modules` and reinstall from scratch |
+| `npm run backend` | Start the backend server |
+| `npm run backend:dev` | Start backend with file watching (development) |
+| `npm run build` | Build all compiled code via Turborepo (parallel, cached) |
+| `npm run frontend` | Build all compiled code sequentially (legacy fallback) |
+| `npm run frontend:dev` | Start frontend dev server with HMR (port 3090, requires backend running) |
+| `npm run build:data-provider` | Rebuild `packages/data-provider` after changes |
+
+- Node.js: v20.19.0+ or ^22.12.0 or >= 23.0.0
+- Database: MongoDB
+- Backend runs on `http://localhost:3080/`; frontend dev server on `http://localhost:3090/`
+
+---
+
+## Testing
+
+- Framework: **Jest**, run per-workspace.
+- Run tests from their workspace directory: `cd api && npx jest <pattern>`, `cd packages/api && npx jest <pattern>`, etc.
+- Frontend tests: `__tests__` directories alongside components; use `test/layout-test-utils` for rendering.
+- Cover loading, success, and error states for UI/data flows.
+
+### Philosophy
+
+- **Real logic over mocks.** Exercise actual code paths with real dependencies. Mocking is a last resort.
+- **Spies over mocks.** Assert that real functions are called with expected arguments and frequency without replacing underlying logic.
+- **MongoDB**: use `mongodb-memory-server` for a real in-memory MongoDB instance. Test actual queries and schema validation, not mocked DB calls.
+- **MCP**: use real `@modelcontextprotocol/sdk` exports for servers, transports, and tool definitions. Mirror real scenarios, don't stub SDK internals.
+- Only mock what you cannot control: external HTTP APIs, rate-limited services, non-deterministic system calls.
+- Heavy mocking is a code smell, not a testing strategy.
+
+---
+
+## Formatting
+
+Fix all formatting lint errors (trailing spaces, tabs, newlines, indentation) using auto-fix when available. All TypeScript/ESLint warnings and errors **must** be resolved.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,236 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-
-
-
-
-
-## [Unreleased]
-
-### ✨ New Features
-
- ✨ feat: implement search parameter updates by **@mawburn** in [#7151](https://github.com/danny-avila/LibreChat/pull/7151)
- 🎏 feat: Add MCP support for Streamable HTTP Transport by **@benverhees** in [#7353](https://github.com/danny-avila/LibreChat/pull/7353)
- 🔒 feat: Add Content Security Policy using Helmet middleware by **@rubentalstra** in [#7377](https://github.com/danny-avila/LibreChat/pull/7377)
- ✨ feat: Add Normalization for MCP Server Names by **@danny-avila** in [#7421](https://github.com/danny-avila/LibreChat/pull/7421)
- 📊 feat: Improve Helm Chart by **@hofq** in [#3638](https://github.com/danny-avila/LibreChat/pull/3638)
- 🦾 feat: Claude-4 Support by **@danny-avila** in [#7509](https://github.com/danny-avila/LibreChat/pull/7509)
- 🪨 feat: Bedrock Support for Claude-4 Reasoning by **@danny-avila** in [#7517](https://github.com/danny-avila/LibreChat/pull/7517)
-
-### 🌍 Internationalization
-
- 🌍 i18n: Add `Danish` and `Czech` and `Catalan` localization support by **@rubentalstra** in [#7373](https://github.com/danny-avila/LibreChat/pull/7373)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#7375](https://github.com/danny-avila/LibreChat/pull/7375)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#7468](https://github.com/danny-avila/LibreChat/pull/7468)
-
-### 🔧 Fixes
-
- 💬 fix: update aria-label for accessibility in ConvoLink component by **@berry-13** in [#7320](https://github.com/danny-avila/LibreChat/pull/7320)
- 🔑 fix: use `apiKey` instead of `openAIApiKey` in OpenAI-like Config by **@danny-avila** in [#7337](https://github.com/danny-avila/LibreChat/pull/7337)
- 🔄 fix: update navigation logic in `useFocusChatEffect` to ensure correct search parameters are used by **@mawburn** in [#7340](https://github.com/danny-avila/LibreChat/pull/7340)
- 🔄 fix: Improve MCP Connection Cleanup by **@danny-avila** in [#7400](https://github.com/danny-avila/LibreChat/pull/7400)
- 🛡️ fix: Preset and Validation Logic for URL Query Params by **@danny-avila** in [#7407](https://github.com/danny-avila/LibreChat/pull/7407)
- 🌘 fix: artifact of preview text is illegible in dark mode by **@nhtruong** in [#7405](https://github.com/danny-avila/LibreChat/pull/7405)
- 🛡️ fix: Temporarily Remove CSP until Configurable by **@danny-avila** in [#7419](https://github.com/danny-avila/LibreChat/pull/7419)
- 💽 fix: Exclude index page `/` from static cache settings by **@sbruel** in [#7382](https://github.com/danny-avila/LibreChat/pull/7382)
-
-### ⚙️ Other Changes
-
- 📜 docs: CHANGELOG for release v0.7.8 by **@github-actions[bot]** in [#7290](https://github.com/danny-avila/LibreChat/pull/7290)
- 📦 chore: Update API Package Dependencies by **@danny-avila** in [#7359](https://github.com/danny-avila/LibreChat/pull/7359)
- 📜 docs: Unreleased Changelog by **@github-actions[bot]** in [#7321](https://github.com/danny-avila/LibreChat/pull/7321)
- 📜 docs: Unreleased Changelog by **@github-actions[bot]** in [#7434](https://github.com/danny-avila/LibreChat/pull/7434)
- 🛡️ chore: `multer` v2.0.0 for CVE-2025-47935 and CVE-2025-47944 by **@danny-avila** in [#7454](https://github.com/danny-avila/LibreChat/pull/7454)
- 📂 refactor: Improve `FileAttachment` & File Form Deletion by **@danny-avila** in [#7471](https://github.com/danny-avila/LibreChat/pull/7471)
- 📊 chore: Remove Old Helm Chart by **@hofq** in [#7512](https://github.com/danny-avila/LibreChat/pull/7512)
- 🪖 chore: bump helm app version to v0.7.8 by **@austin-barrington** in [#7524](https://github.com/danny-avila/LibreChat/pull/7524)
-
-
-
---
-## [v0.7.8] - 
-
-Changes from v0.7.8-rc1 to v0.7.8.
-
-### ✨ New Features
-
- ✨ feat: Enhance form submission for touch screens by **@berry-13** in [#7198](https://github.com/danny-avila/LibreChat/pull/7198)
- 🔍 feat: Additional Tavily API Tool Parameters by **@glowforge-opensource** in [#7232](https://github.com/danny-avila/LibreChat/pull/7232)
- 🐋 feat: Add python to Dockerfile for increased MCP compatibility by **@technicalpickles** in [#7270](https://github.com/danny-avila/LibreChat/pull/7270)
-
-### 🔧 Fixes
-
- 🔧 fix: Google Gemma Support & OpenAI Reasoning Instructions by **@danny-avila** in [#7196](https://github.com/danny-avila/LibreChat/pull/7196)
- 🛠️ fix: Conversation Navigation State by **@danny-avila** in [#7210](https://github.com/danny-avila/LibreChat/pull/7210)
- 🔄 fix: o-Series Model Regex for System Messages by **@danny-avila** in [#7245](https://github.com/danny-avila/LibreChat/pull/7245)
- 🔖 fix: Custom Headers for Initial MCP SSE Connection by **@danny-avila** in [#7246](https://github.com/danny-avila/LibreChat/pull/7246)
- 🛡️ fix: Deep Clone `MCPOptions` for User MCP Connections by **@danny-avila** in [#7247](https://github.com/danny-avila/LibreChat/pull/7247)
- 🔄 fix: URL Param Race Condition and File Draft Persistence by **@danny-avila** in [#7257](https://github.com/danny-avila/LibreChat/pull/7257)
- 🔄 fix: Assistants Endpoint & Minor Issues by **@danny-avila** in [#7274](https://github.com/danny-avila/LibreChat/pull/7274)
- 🔄 fix: Ollama Think Tag Edge Case with Tools by **@danny-avila** in [#7275](https://github.com/danny-avila/LibreChat/pull/7275)
-
-### ⚙️ Other Changes
-
- 📜 docs: CHANGELOG for release v0.7.8-rc1 by **@github-actions[bot]** in [#7153](https://github.com/danny-avila/LibreChat/pull/7153)
- 🔄 refactor: Artifact Visibility Management by **@danny-avila** in [#7181](https://github.com/danny-avila/LibreChat/pull/7181)
- 📦 chore: Bump Package Security by **@danny-avila** in [#7183](https://github.com/danny-avila/LibreChat/pull/7183)
- 🌿 refactor: Unmount Fork Popover on Hide for Better Performance by **@danny-avila** in [#7189](https://github.com/danny-avila/LibreChat/pull/7189)
- 🧰 chore: ESLint configuration to enforce Prettier formatting rules by **@mawburn** in [#7186](https://github.com/danny-avila/LibreChat/pull/7186)
- 🎨 style: Improve KaTeX Rendering for LaTeX Equations by **@andresgit** in [#7223](https://github.com/danny-avila/LibreChat/pull/7223)
- 📝 docs: Update `.env.example` Google models by **@marlonka** in [#7254](https://github.com/danny-avila/LibreChat/pull/7254)
- 💬 refactor: MCP Chat Visibility Option, Google Rates, Remove OpenAPI Plugins by **@danny-avila** in [#7286](https://github.com/danny-avila/LibreChat/pull/7286)
- 📜 docs: Unreleased Changelog by **@github-actions[bot]** in [#7214](https://github.com/danny-avila/LibreChat/pull/7214)
-
-
-
-[See full release details][release-v0.7.8]
-
-[release-v0.7.8]: https://github.com/danny-avila/LibreChat/releases/tag/v0.7.8
-
---
-## [v0.7.8-rc1] - 
-
-Changes from v0.7.7 to v0.7.8-rc1.
-
-### ✨ New Features
-
- 🔍 feat: Mistral OCR API / Upload Files as Text by **@danny-avila** in [#6274](https://github.com/danny-avila/LibreChat/pull/6274)
- 🤖 feat: Support OpenAI Web Search models by **@danny-avila** in [#6313](https://github.com/danny-avila/LibreChat/pull/6313)
- 🔗 feat: Agent Chain (Mixture-of-Agents) by **@danny-avila** in [#6374](https://github.com/danny-avila/LibreChat/pull/6374)
- ⌛ feat: `initTimeout` for Slow Starting MCP Servers by **@perweij** in [#6383](https://github.com/danny-avila/LibreChat/pull/6383)
- 🚀 feat: `S3` Integration for File handling and Image uploads by **@rubentalstra** in [#6142](https://github.com/danny-avila/LibreChat/pull/6142)
- 🔒feat: Enable OpenID Auto-Redirect by **@leondape** in [#6066](https://github.com/danny-avila/LibreChat/pull/6066)
- 🚀 feat: Integrate `Azure Blob Storage` for file handling and image uploads by **@rubentalstra** in [#6153](https://github.com/danny-avila/LibreChat/pull/6153)
- 🚀 feat: Add support for custom `AWS` endpoint in `S3` by **@rubentalstra** in [#6431](https://github.com/danny-avila/LibreChat/pull/6431)
- 🚀 feat: Add support for LDAP STARTTLS in LDAP authentication by **@rubentalstra** in [#6438](https://github.com/danny-avila/LibreChat/pull/6438)
- 🚀 feat: Refactor schema exports and update package version to 0.0.4 by **@rubentalstra** in [#6455](https://github.com/danny-avila/LibreChat/pull/6455)
- 🔼 feat: Add Auto Submit For URL Query Params by **@mjaverto** in [#6440](https://github.com/danny-avila/LibreChat/pull/6440)
- 🛠 feat: Enhance Redis Integration, Rate Limiters & Log Headers by **@danny-avila** in [#6462](https://github.com/danny-avila/LibreChat/pull/6462)
- 💵 feat: Add Automatic Balance Refill by **@rubentalstra** in [#6452](https://github.com/danny-avila/LibreChat/pull/6452)
- 🗣️ feat: add support for gpt-4o-transcribe models by **@berry-13** in [#6483](https://github.com/danny-avila/LibreChat/pull/6483)
- 🎨 feat: UI Refresh for Enhanced UX by **@berry-13** in [#6346](https://github.com/danny-avila/LibreChat/pull/6346)
- 🌍 feat: Add support for Hungarian language localization by **@rubentalstra** in [#6508](https://github.com/danny-avila/LibreChat/pull/6508)
- 🚀 feat: Add Gemini 2.5 Token/Context Values, Increase Max Possible Output to 64k by **@danny-avila** in [#6563](https://github.com/danny-avila/LibreChat/pull/6563)
- 🚀 feat: Enhance MCP Connections For Multi-User Support by **@danny-avila** in [#6610](https://github.com/danny-avila/LibreChat/pull/6610)
- 🚀 feat: Enhance S3 URL Expiry with Refresh; fix: S3 File Deletion by **@danny-avila** in [#6647](https://github.com/danny-avila/LibreChat/pull/6647)
- 🚀 feat: enhance UI components and refactor settings by **@berry-13** in [#6625](https://github.com/danny-avila/LibreChat/pull/6625)
- 💬 feat: move TemporaryChat to the Header by **@berry-13** in [#6646](https://github.com/danny-avila/LibreChat/pull/6646)
- 🚀 feat: Use Model Specs + Specific Endpoints, Limit Providers for Agents by **@danny-avila** in [#6650](https://github.com/danny-avila/LibreChat/pull/6650)
- 🪙 feat: Sync Balance Config on Login by **@danny-avila** in [#6671](https://github.com/danny-avila/LibreChat/pull/6671)
- 🔦 feat: MCP Support for Non-Agent Endpoints by **@danny-avila** in [#6775](https://github.com/danny-avila/LibreChat/pull/6775)
- 🗃️ feat: Code Interpreter File Persistence between Sessions by **@danny-avila** in [#6790](https://github.com/danny-avila/LibreChat/pull/6790)
- 🖥️ feat: Code Interpreter API for Non-Agent Endpoints by **@danny-avila** in [#6803](https://github.com/danny-avila/LibreChat/pull/6803)
- ⚡ feat: Self-hosted Artifacts Static Bundler URL by **@danny-avila** in [#6827](https://github.com/danny-avila/LibreChat/pull/6827)
- 🐳 feat: Add Jemalloc and UV to Docker Builds by **@danny-avila** in [#6836](https://github.com/danny-avila/LibreChat/pull/6836)
- 🤖 feat: GPT-4.1 by **@danny-avila** in [#6880](https://github.com/danny-avila/LibreChat/pull/6880)
- 👋 feat: remove Edge TTS by **@berry-13** in [#6885](https://github.com/danny-avila/LibreChat/pull/6885)
- feat: nav optimization  by **@berry-13** in [#5785](https://github.com/danny-avila/LibreChat/pull/5785)
- 🗺️ feat: Add Parameter Location Mapping for OpenAPI actions by **@peeeteeer** in [#6858](https://github.com/danny-avila/LibreChat/pull/6858)
- 🤖 feat: Support `o4-mini` and `o3` Models by **@danny-avila** in [#6928](https://github.com/danny-avila/LibreChat/pull/6928)
- 🎨 feat: OpenAI Image Tools (GPT-Image-1) by **@danny-avila** in [#7079](https://github.com/danny-avila/LibreChat/pull/7079)
- 🗓️ feat: Add Special Variables for Prompts & Agents, Prompt UI Improvements by **@danny-avila** in [#7123](https://github.com/danny-avila/LibreChat/pull/7123)
-
-### 🌍 Internationalization
-
- 🌍 i18n: Add Thai Language Support and Update Translations by **@rubentalstra** in [#6219](https://github.com/danny-avila/LibreChat/pull/6219)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6220](https://github.com/danny-avila/LibreChat/pull/6220)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6240](https://github.com/danny-avila/LibreChat/pull/6240)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6241](https://github.com/danny-avila/LibreChat/pull/6241)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6277](https://github.com/danny-avila/LibreChat/pull/6277)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6414](https://github.com/danny-avila/LibreChat/pull/6414)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6505](https://github.com/danny-avila/LibreChat/pull/6505)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6530](https://github.com/danny-avila/LibreChat/pull/6530)
- 🌍 i18n: Add Persian Localization Support by **@rubentalstra** in [#6669](https://github.com/danny-avila/LibreChat/pull/6669)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#6667](https://github.com/danny-avila/LibreChat/pull/6667)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#7126](https://github.com/danny-avila/LibreChat/pull/7126)
- 🌍 i18n: Update translation.json with latest translations by **@github-actions[bot]** in [#7148](https://github.com/danny-avila/LibreChat/pull/7148)
-
-### 👐 Accessibility
-
- 🎨 a11y: Update Model Spec Description Text by **@berry-13** in [#6294](https://github.com/danny-avila/LibreChat/pull/6294)
- 🗑️ a11y: Add Accessible Name to Button for File Attachment Removal by **@kangabell** in [#6709](https://github.com/danny-avila/LibreChat/pull/6709)
- ⌨️ a11y: enhance accessibility & visual consistency by **@berry-13** in [#6866](https://github.com/danny-avila/LibreChat/pull/6866)
- 🙌 a11y: Searchbar/Conversations List Focus by **@danny-avila** in [#7096](https://github.com/danny-avila/LibreChat/pull/7096)
- 👐 a11y: Improve Fork and SplitText Accessibility by **@danny-avila** in [#7147](https://github.com/danny-avila/LibreChat/pull/7147)
-
-### 🔧 Fixes
-
- 🐛 fix: Avatar Type Definitions in Agent/Assistant Schemas by **@danny-avila** in [#6235](https://github.com/danny-avila/LibreChat/pull/6235)
- 🔧 fix: MeiliSearch Field Error and Patch Incorrect Import by #6210 by **@rubentalstra** in [#6245](https://github.com/danny-avila/LibreChat/pull/6245)
- 🔏 fix: Enhance Two-Factor Authentication by **@rubentalstra** in [#6247](https://github.com/danny-avila/LibreChat/pull/6247)
- 🐛 fix: Await saveMessage in abortMiddleware to ensure proper execution by **@sh4shii** in [#6248](https://github.com/danny-avila/LibreChat/pull/6248)
- 🔧 fix: Axios Proxy Usage And Bump `mongoose` by **@danny-avila** in [#6298](https://github.com/danny-avila/LibreChat/pull/6298)
- 🔧 fix: comment out MCP servers to resolve service run issues by **@KunalScriptz** in [#6316](https://github.com/danny-avila/LibreChat/pull/6316)
- 🔧 fix: Update Token Calculations and Mapping, MCP `env` Initialization by **@danny-avila** in [#6406](https://github.com/danny-avila/LibreChat/pull/6406)
- 🐞 fix: Agent "Resend" Message Attachments + Source Icon Styling by **@danny-avila** in [#6408](https://github.com/danny-avila/LibreChat/pull/6408)
- 🐛 fix: Prevent Crash on Duplicate Message ID by **@Odrec** in [#6392](https://github.com/danny-avila/LibreChat/pull/6392)
- 🔐 fix: Invalid Key Length in 2FA Encryption by **@rubentalstra** in [#6432](https://github.com/danny-avila/LibreChat/pull/6432)
- 🏗️ fix: Fix Agents Token Spend Race Conditions, Expand Test Coverage by **@danny-avila** in [#6480](https://github.com/danny-avila/LibreChat/pull/6480)
- 🔃 fix: Draft Clearing, Claude Titles, Remove Default Vision Max Tokens by **@danny-avila** in [#6501](https://github.com/danny-avila/LibreChat/pull/6501)
- 🔧 fix: Update username reference to use user.name in greeting display by **@rubentalstra** in [#6534](https://github.com/danny-avila/LibreChat/pull/6534)
- 🔧 fix: S3 Download Stream with Key Extraction and Blob Storage Encoding for Vision by **@danny-avila** in [#6557](https://github.com/danny-avila/LibreChat/pull/6557)
- 🔧 fix: Mistral type strictness for `usage` & update token values/windows by **@danny-avila** in [#6562](https://github.com/danny-avila/LibreChat/pull/6562)
- 🔧 fix: Consolidate Text Parsing and TTS Edge Initialization by **@danny-avila** in [#6582](https://github.com/danny-avila/LibreChat/pull/6582)
- 🔧 fix: Ensure continuation in image processing on base64 encoding from Blob Storage by **@danny-avila** in [#6619](https://github.com/danny-avila/LibreChat/pull/6619)
- ✉️ fix: Fallback For User Name In Email Templates by **@danny-avila** in [#6620](https://github.com/danny-avila/LibreChat/pull/6620)
- 🔧 fix: Azure Blob Integration and File Source References by **@rubentalstra** in [#6575](https://github.com/danny-avila/LibreChat/pull/6575)
- 🐛 fix: Safeguard against undefined addedEndpoints by **@wipash** in [#6654](https://github.com/danny-avila/LibreChat/pull/6654)
- 🤖 fix: Gemini 2.5 Vision Support by **@danny-avila** in [#6663](https://github.com/danny-avila/LibreChat/pull/6663)
- 🔄 fix: Avatar & Error Handling Enhancements by **@danny-avila** in [#6687](https://github.com/danny-avila/LibreChat/pull/6687)
- 🔧 fix: Chat Middleware, Zod Conversion, Auto-Save and S3 URL Refresh by **@danny-avila** in [#6720](https://github.com/danny-avila/LibreChat/pull/6720)
- 🔧 fix: Agent Capability Checks & DocumentDB Compatibility for Agent Resource Removal by **@danny-avila** in [#6726](https://github.com/danny-avila/LibreChat/pull/6726)
- 🔄 fix: Improve audio MIME type detection and handling by **@berry-13** in [#6707](https://github.com/danny-avila/LibreChat/pull/6707)
- 🪺 fix: Update Role Handling due to New Schema Shape by **@danny-avila** in [#6774](https://github.com/danny-avila/LibreChat/pull/6774)
- 🗨️ fix: Show ModelSpec Greeting by **@berry-13** in [#6770](https://github.com/danny-avila/LibreChat/pull/6770)
- 🔧 fix: Keyv and Proxy Issues, and More Memory Optimizations by **@danny-avila** in [#6867](https://github.com/danny-avila/LibreChat/pull/6867)
- ✨ fix: Implement dynamic text sizing for greeting and name display by **@berry-13** in [#6833](https://github.com/danny-avila/LibreChat/pull/6833)
- 📝 fix: Mistral OCR Image Support and Azure Agent Titles by **@danny-avila** in [#6901](https://github.com/danny-avila/LibreChat/pull/6901)
- 📢 fix: Invalid `engineTTS` and Conversation State on Navigation by **@berry-13** in [#6904](https://github.com/danny-avila/LibreChat/pull/6904)
- 🛠️ fix: Improve Accessibility and Display of Conversation Menu by **@danny-avila** in [#6913](https://github.com/danny-avila/LibreChat/pull/6913)
- 🔧 fix: Agent Resource Form, Convo Menu Style, Ensure Draft Clears on Submission by **@danny-avila** in [#6925](https://github.com/danny-avila/LibreChat/pull/6925)
- 🔀 fix: MCP Improvements, Auto-Save Drafts, Artifact Markup by **@danny-avila** in [#7040](https://github.com/danny-avila/LibreChat/pull/7040)
- 🐋 fix: Improve Deepseek Compatbility by **@danny-avila** in [#7132](https://github.com/danny-avila/LibreChat/pull/7132)
- 🐙 fix: Add Redis Ping Interval to Prevent Connection Drops by **@peeeteeer** in [#7127](https://github.com/danny-avila/LibreChat/pull/7127)
-
-### ⚙️ Other Changes
-
- 📦 refactor: Move DB Models to `@librechat/data-schemas` by **@rubentalstra** in [#6210](https://github.com/danny-avila/LibreChat/pull/6210)
- 📦 chore: Patch `axios` to address CVE-2025-27152 by **@danny-avila** in [#6222](https://github.com/danny-avila/LibreChat/pull/6222)
- ⚠️ refactor: Use Error Content Part Instead Of Throwing Error for Agents by **@danny-avila** in [#6262](https://github.com/danny-avila/LibreChat/pull/6262)
- 🏃‍♂️ refactor: Improve Agent Run Context & Misc. Changes by **@danny-avila** in [#6448](https://github.com/danny-avila/LibreChat/pull/6448)
- 📝 docs: librechat.example.yaml by **@ineiti** in [#6442](https://github.com/danny-avila/LibreChat/pull/6442)
- 🏃‍♂️ refactor: More Agent Context Improvements during Run by **@danny-avila** in [#6477](https://github.com/danny-avila/LibreChat/pull/6477)
- 🔃 refactor: Allow streaming for `o1` models by **@danny-avila** in [#6509](https://github.com/danny-avila/LibreChat/pull/6509)
- 🔧 chore: `Vite` Plugin Upgrades & Config Optimizations by **@rubentalstra** in [#6547](https://github.com/danny-avila/LibreChat/pull/6547)
- 🔧 refactor: Consolidate Logging, Model Selection & Actions Optimizations, Minor Fixes by **@danny-avila** in [#6553](https://github.com/danny-avila/LibreChat/pull/6553)
- 🎨 style: Address Minor UI Refresh Issues by **@berry-13** in [#6552](https://github.com/danny-avila/LibreChat/pull/6552)
- 🔧 refactor: Enhance Model & Endpoint Configurations with Global Indicators 🌍 by **@berry-13** in [#6578](https://github.com/danny-avila/LibreChat/pull/6578)
- 💬 style: Chat UI, Greeting, and Message adjustments by **@berry-13** in [#6612](https://github.com/danny-avila/LibreChat/pull/6612)
- ⚡ refactor: DocumentDB Compatibility for Balance Updates by **@danny-avila** in [#6673](https://github.com/danny-avila/LibreChat/pull/6673)
- 🧹 chore: Update ESLint rules for React hooks by **@rubentalstra** in [#6685](https://github.com/danny-avila/LibreChat/pull/6685)
- 🪙 chore: Update Gemini Pricing by **@RedwindA** in [#6731](https://github.com/danny-avila/LibreChat/pull/6731)
- 🪺 refactor: Nest Permission fields for Roles by **@rubentalstra** in [#6487](https://github.com/danny-avila/LibreChat/pull/6487)
- 📦 chore: Update `caniuse-lite` dependency to version 1.0.30001706 by **@rubentalstra** in [#6482](https://github.com/danny-avila/LibreChat/pull/6482)
- ⚙️ refactor: OAuth Flow Signal, Type Safety, Tool Progress & Updated Packages by **@danny-avila** in [#6752](https://github.com/danny-avila/LibreChat/pull/6752)
- 📦 chore: bump vite from 6.2.3 to 6.2.5 by **@dependabot[bot]** in [#6745](https://github.com/danny-avila/LibreChat/pull/6745)
- 💾 chore: Enhance Local Storage Handling and Update MCP SDK by **@danny-avila** in [#6809](https://github.com/danny-avila/LibreChat/pull/6809)
- 🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 by **@danny-avila** in [#6850](https://github.com/danny-avila/LibreChat/pull/6850)
- 💾 refactor: Enhance Memory In Image Encodings & Client Disposal by **@danny-avila** in [#6852](https://github.com/danny-avila/LibreChat/pull/6852)
- 🔁 refactor: Token Event Handler and Standardize `maxTokens` Key by **@danny-avila** in [#6886](https://github.com/danny-avila/LibreChat/pull/6886)
- 🔍 refactor: Search & Message Retrieval by **@berry-13** in [#6903](https://github.com/danny-avila/LibreChat/pull/6903)
- 🎨 style: standardize dropdown styling & fix z-Index layering by **@berry-13** in [#6939](https://github.com/danny-avila/LibreChat/pull/6939)
- 📙 docs: CONTRIBUTING.md by **@dblock** in [#6831](https://github.com/danny-avila/LibreChat/pull/6831)
- 🧭 refactor: Modernize Nav/Header by **@danny-avila** in [#7094](https://github.com/danny-avila/LibreChat/pull/7094)
- 🪶 refactor: Chat Input Focus for Conversation Navigations & ChatForm Optimizations by **@danny-avila** in [#7100](https://github.com/danny-avila/LibreChat/pull/7100)
- 🔃 refactor: Streamline Navigation, Message Loading UX by **@danny-avila** in [#7118](https://github.com/danny-avila/LibreChat/pull/7118)
- 📜 docs: Unreleased changelog by **@github-actions[bot]** in [#6265](https://github.com/danny-avila/LibreChat/pull/6265)
-
-
-
-[See full release details][release-v0.7.8-rc1]
-
-[release-v0.7.8-rc1]: https://github.com/danny-avila/LibreChat/releases/tag/v0.7.8-rc1
-
---
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -0,0 +1 @@
+AGENTS.md
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-# v0.8.2-rc2
+# v0.8.3

 # Base node image
 FROM node:20-alpine AS node
--- a/Dockerfile.multi
+++ b/Dockerfile.multi
@ -1,5 +1,5 @@
 # Dockerfile.multi
-# v0.8.2-rc2
+# v0.8.3

 # Set configurable max-old-space-size with default
 ARG NODE_MAX_OLD_SPACE_SIZE=6144
--- a/README.md
+++ b/README.md
@ -27,8 +27,8 @@
 </p>

 <p align="center">
-<a href="https://railway.app/template/b5k2mn?referralCode=HI9hWz">
-  <img src="https://railway.app/button.svg" alt="Deploy on Railway" height="30">
+<a href="https://railway.com/deploy/b5k2mn?referralCode=HI9hWz">
+  <img src="https://railway.com/button.svg" alt="Deploy on Railway" height="30">
 </a>
 <a href="https://zeabur.com/templates/0X2ZY8">
  <img src="https://zeabur.com/button.svg" alt="Deploy on Zeabur" height="30"/>
@ -109,6 +109,11 @@
 - 🎨 **Customizable Interface**:  
  - Customizable Dropdown & Interface that adapts to both power users and newcomers

+- 🌊 **[Resumable Streams](https://www.librechat.ai/docs/features/resumable_streams)**:  
+  - Never lose a response: AI responses automatically reconnect and resume if your connection drops
+  - Multi-Tab & Multi-Device Sync: Open the same chat in multiple tabs or pick up on another device
+  - Production-Ready: Works from single-server setups to horizontally scaled deployments with Redis
+
 - 🗣️ **Speech & Audio**:  
  - Chat hands-free with Speech-to-Text and Text-to-Speech  
  - Automatically send and play Audio  
@ -137,13 +142,11 @@

 ## 🪶 All-In-One AI Conversations with LibreChat

-LibreChat brings together the future of assistant AIs with the revolutionary technology of OpenAI's ChatGPT. Celebrating the original styling, LibreChat gives you the ability to integrate multiple AI models. It also integrates and enhances original client features such as conversation and message search, prompt templates and plugins.
+LibreChat is a self-hosted AI chat platform that unifies all major AI providers in a single, privacy-focused interface.

-With LibreChat, you no longer need to opt for ChatGPT Plus and can instead use free or pay-per-call APIs. We welcome contributions, cloning, and forking to enhance the capabilities of this advanced chatbot platform.
+Beyond chat, LibreChat provides AI Agents, Model Context Protocol (MCP) support, Artifacts, Code Interpreter, custom actions, conversation search, and enterprise-ready multi-user authentication.

-[![Watch the video](https://raw.githubusercontent.com/LibreChat-AI/librechat.ai/main/public/images/changelog/v0.7.6.gif)](https://www.youtube.com/watch?v=ilfwGQtJNlI)
-
-Click on the thumbnail to open the video☝️
+Open source, actively developed, and built for anyone who values control over their AI infrastructure.

 ---

--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -4,6 +4,7 @@ const { logger } = require('@librechat/data-schemas');
 const {
  countTokens,
  getBalanceConfig,
+  buildMessageFiles,
  extractFileContext,
  encodeAndFormatAudios,
  encodeAndFormatVideos,
@ -20,6 +21,7 @@ const {
  isAgentsEndpoint,
  isEphemeralAgentId,
  supportsBalanceCheck,
+  isBedrockDocumentType,
 } = require('librechat-data-provider');
 const {
  updateMessage,
@ -122,7 +124,9 @@ class BaseClient {
   * @returns {number}
   */
  getTokenCountForResponse(responseMessage) {
-    logger.debug('[BaseClient] `recordTokenUsage` not implemented.', responseMessage);
+    logger.debug('[BaseClient] `recordTokenUsage` not implemented.', {
+      messageId: responseMessage?.messageId,
+    });
  }

  /**
@ -133,12 +137,14 @@ class BaseClient {
   * @param {AppConfig['balance']} [balance]
   * @param {number} promptTokens
   * @param {number} completionTokens
+   * @param {string} [messageId]
   * @returns {Promise<void>}
   */
-  async recordTokenUsage({ model, balance, promptTokens, completionTokens }) {
+  async recordTokenUsage({ model, balance, promptTokens, completionTokens, messageId }) {
    logger.debug('[BaseClient] `recordTokenUsage` not implemented.', {
      model,
      balance,
+      messageId,
      promptTokens,
      completionTokens,
    });
@ -659,16 +665,27 @@ class BaseClient {
    );

    if (tokenCountMap) {
-      logger.debug('[BaseClient] tokenCountMap', tokenCountMap);
      if (tokenCountMap[userMessage.messageId]) {
        userMessage.tokenCount = tokenCountMap[userMessage.messageId];
-        logger.debug('[BaseClient] userMessage', userMessage);
+        logger.debug('[BaseClient] userMessage', {
+          messageId: userMessage.messageId,
+          tokenCount: userMessage.tokenCount,
+          conversationId: userMessage.conversationId,
+        });
      }

      this.handleTokenCountMap(tokenCountMap);
    }

    if (!isEdited && !this.skipSaveUserMessage) {
+      const reqFiles = this.options.req?.body?.files;
+      if (reqFiles && Array.isArray(this.options.attachments)) {
+        const files = buildMessageFiles(reqFiles, this.options.attachments);
+        if (files.length > 0) {
+          userMessage.files = files;
+        }
+        delete userMessage.image_urls;
+      }
      userMessagePromise = this.saveMessageToDatabase(userMessage, saveOptions, user);
      this.savedMessageIds.add(userMessage.messageId);
      if (typeof opts?.getReqData === 'function') {
@ -780,9 +797,18 @@ class BaseClient {
          promptTokens,
          completionTokens,
          balance: balanceConfig,
-          model: responseMessage.model,
+          /** Note: When using agents, responseMessage.model is the agent ID, not the model */
+          model: this.model,
+          messageId: this.responseMessageId,
        });
      }
+
+      logger.debug('[BaseClient] Response token usage', {
+        messageId: responseMessage.messageId,
+        model: responseMessage.model,
+        promptTokens,
+        completionTokens,
+      });
    }

    if (userMessagePromise) {
@ -1300,6 +1326,9 @@ class BaseClient {

    const allFiles = [];

+    const provider = this.options.agent?.provider ?? this.options.endpoint;
+    const isBedrock = provider === EModelEndpoint.bedrock;
+
    for (const file of attachments) {
      /** @type {FileSources} */
      const source = file.source ?? FileSources.local;
@ -1317,6 +1346,9 @@ class BaseClient {
      } else if (file.type === 'application/pdf') {
        categorizedAttachments.documents.push(file);
        allFiles.push(file);
+      } else if (isBedrock && isBedrockDocumentType(file.type)) {
+        categorizedAttachments.documents.push(file);
+        allFiles.push(file);
      } else if (file.type.startsWith('video/')) {
        categorizedAttachments.videos.push(file);
        allFiles.push(file);
--- a/api/app/clients/specs/BaseClient.test.js
+++ b/api/app/clients/specs/BaseClient.test.js
@ -41,9 +41,9 @@ jest.mock('~/models', () => ({
 const { getConvo, saveConvo } = require('~/models');

 jest.mock('@librechat/agents', () => {
-  const { Providers } = jest.requireActual('@librechat/agents');
+  const actual = jest.requireActual('@librechat/agents');
  return {
-    Providers,
+    ...actual,
    ChatOpenAI: jest.fn().mockImplementation(() => {
      return {};
    }),
@ -821,6 +821,56 @@ describe('BaseClient', () => {
    });
  });

+  describe('recordTokenUsage model assignment', () => {
+    test('should pass this.model to recordTokenUsage, not the agent ID from responseMessage.model', async () => {
+      const actualModel = 'claude-opus-4-5';
+      const agentId = 'agent_p5Z_IU6EIxBoqn1BoqLBp';
+
+      TestClient.model = actualModel;
+      TestClient.options.endpoint = 'agents';
+      TestClient.options.agent = { id: agentId };
+
+      TestClient.getTokenCountForResponse = jest.fn().mockReturnValue(50);
+      TestClient.recordTokenUsage = jest.fn().mockResolvedValue(undefined);
+      TestClient.buildMessages.mockReturnValue({
+        prompt: [],
+        tokenCountMap: { res: 50 },
+      });
+
+      await TestClient.sendMessage('Hello', {});
+
+      expect(TestClient.recordTokenUsage).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: actualModel,
+        }),
+      );
+
+      const callArgs = TestClient.recordTokenUsage.mock.calls[0][0];
+      expect(callArgs.model).not.toBe(agentId);
+    });
+
+    test('should pass this.model even when this.model differs from modelOptions.model', async () => {
+      const instanceModel = 'gpt-4o';
+      TestClient.model = instanceModel;
+      TestClient.modelOptions = { model: 'gpt-4o-mini' };
+
+      TestClient.getTokenCountForResponse = jest.fn().mockReturnValue(50);
+      TestClient.recordTokenUsage = jest.fn().mockResolvedValue(undefined);
+      TestClient.buildMessages.mockReturnValue({
+        prompt: [],
+        tokenCountMap: { res: 50 },
+      });
+
+      await TestClient.sendMessage('Hello', {});
+
+      expect(TestClient.recordTokenUsage).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: instanceModel,
+        }),
+      );
+    });
+  });
+
  describe('getMessagesWithinTokenLimit with instructions', () => {
    test('should always include instructions when present', async () => {
      TestClient.maxContextTokens = 50;
@ -928,4 +978,123 @@ describe('BaseClient', () => {
      expect(result.remainingContextTokens).toBe(2); // 25 - 20 - 3(assistant label)
    });
  });
+
+  describe('sendMessage file population', () => {
+    const attachment = {
+      file_id: 'file-abc',
+      filename: 'image.png',
+      filepath: '/uploads/image.png',
+      type: 'image/png',
+      bytes: 1024,
+      object: 'file',
+      user: 'user-1',
+      embedded: false,
+      usage: 0,
+      text: 'large ocr blob that should be stripped',
+      _id: 'mongo-id-1',
+    };
+
+    beforeEach(() => {
+      TestClient.options.req = { body: { files: [{ file_id: 'file-abc' }] } };
+      TestClient.options.attachments = [attachment];
+    });
+
+    test('populates userMessage.files before saveMessageToDatabase is called', async () => {
+      TestClient.saveMessageToDatabase = jest.fn().mockImplementation((msg) => {
+        return Promise.resolve({ message: msg });
+      });
+
+      await TestClient.sendMessage('Hello');
+
+      const userSave = TestClient.saveMessageToDatabase.mock.calls.find(
+        ([msg]) => msg.isCreatedByUser,
+      );
+      expect(userSave).toBeDefined();
+      expect(userSave[0].files).toBeDefined();
+      expect(userSave[0].files).toHaveLength(1);
+      expect(userSave[0].files[0].file_id).toBe('file-abc');
+    });
+
+    test('strips text and _id from files before saving', async () => {
+      TestClient.saveMessageToDatabase = jest.fn().mockResolvedValue({ message: {} });
+
+      await TestClient.sendMessage('Hello');
+
+      const userSave = TestClient.saveMessageToDatabase.mock.calls.find(
+        ([msg]) => msg.isCreatedByUser,
+      );
+      expect(userSave[0].files[0].text).toBeUndefined();
+      expect(userSave[0].files[0]._id).toBeUndefined();
+      expect(userSave[0].files[0].filename).toBe('image.png');
+    });
+
+    test('deletes image_urls from userMessage when files are present', async () => {
+      TestClient.saveMessageToDatabase = jest.fn().mockResolvedValue({ message: {} });
+      TestClient.options.attachments = [
+        { ...attachment, image_urls: ['data:image/png;base64,...'] },
+      ];
+
+      await TestClient.sendMessage('Hello');
+
+      const userSave = TestClient.saveMessageToDatabase.mock.calls.find(
+        ([msg]) => msg.isCreatedByUser,
+      );
+      expect(userSave[0].image_urls).toBeUndefined();
+    });
+
+    test('does not set files when no attachments match request file IDs', async () => {
+      TestClient.options.req = { body: { files: [{ file_id: 'file-nomatch' }] } };
+      TestClient.saveMessageToDatabase = jest.fn().mockResolvedValue({ message: {} });
+
+      await TestClient.sendMessage('Hello');
+
+      const userSave = TestClient.saveMessageToDatabase.mock.calls.find(
+        ([msg]) => msg.isCreatedByUser,
+      );
+      expect(userSave[0].files).toBeUndefined();
+    });
+
+    test('skips file population when attachments is not an array (Promise case)', async () => {
+      TestClient.options.attachments = Promise.resolve([attachment]);
+      TestClient.saveMessageToDatabase = jest.fn().mockResolvedValue({ message: {} });
+
+      await TestClient.sendMessage('Hello');
+
+      const userSave = TestClient.saveMessageToDatabase.mock.calls.find(
+        ([msg]) => msg.isCreatedByUser,
+      );
+      expect(userSave[0].files).toBeUndefined();
+    });
+
+    test('skips file population when skipSaveUserMessage is true', async () => {
+      TestClient.skipSaveUserMessage = true;
+      TestClient.saveMessageToDatabase = jest.fn().mockResolvedValue({ message: {} });
+
+      await TestClient.sendMessage('Hello');
+
+      const userSave = TestClient.saveMessageToDatabase.mock.calls.find(
+        ([msg]) => msg?.isCreatedByUser,
+      );
+      expect(userSave).toBeUndefined();
+    });
+
+    test('ignores file_id: undefined entries in req.body.files (no set poisoning)', async () => {
+      TestClient.options.req = {
+        body: { files: [{ file_id: undefined }, { file_id: 'file-abc' }] },
+      };
+      TestClient.options.attachments = [
+        { ...attachment, file_id: undefined },
+        { ...attachment, file_id: 'file-abc' },
+      ];
+      TestClient.saveMessageToDatabase = jest.fn().mockResolvedValue({ message: {} });
+
+      await TestClient.sendMessage('Hello');
+
+      const userSave = TestClient.saveMessageToDatabase.mock.calls.find(
+        ([msg]) => msg.isCreatedByUser,
+      );
+      expect(userSave[0].files).toHaveLength(1);
+      expect(userSave[0].files[0].file_id).toBe('file-abc');
+    });
+  });
 });
--- a/api/app/clients/tools/index.js
+++ b/api/app/clients/tools/index.js
@ -5,7 +5,6 @@ const DALLE3 = require('./structured/DALLE3');
 const FluxAPI = require('./structured/FluxAPI');
 const OpenWeather = require('./structured/OpenWeather');
 const StructuredWolfram = require('./structured/Wolfram');
-const createYouTubeTools = require('./structured/YouTube');
 const StructuredACS = require('./structured/AzureAISearch');
 const StructuredSD = require('./structured/StableDiffusion');
 const GoogleSearchAPI = require('./structured/GoogleSearch');
@ -25,7 +24,6 @@ module.exports = {
  GoogleSearchAPI,
  TraversaalSearch,
  StructuredWolfram,
-  createYouTubeTools,
  TavilySearchResults,
  createOpenAIImageTools,
  createGeminiImageTool,
--- a/api/app/clients/tools/manifest.json
+++ b/api/app/clients/tools/manifest.json
@ -16,7 +16,7 @@
    "name": "Google",
    "pluginKey": "google",
    "description": "Use Google Search to find information about the weather, news, sports, and more.",
-    "icon": "https://i.imgur.com/SMmVkNB.png",
+    "icon": "assets/google-search.svg",
    "authConfig": [
      {
        "authField": "GOOGLE_CSE_ID",
@ -30,20 +30,6 @@
      }
    ]
  },
-  {
-    "name": "YouTube",
-    "pluginKey": "youtube",
-    "toolkit": true,
-    "description": "Get YouTube video information, retrieve comments, analyze transcripts and search for videos.",
-    "icon": "https://www.youtube.com/s/desktop/7449ebf7/img/favicon_144x144.png",
-    "authConfig": [
-      {
-        "authField": "YOUTUBE_API_KEY",
-        "label": "YouTube API Key",
-        "description": "Your YouTube Data API v3 key."
-      }
-    ]
-  },
  {
    "name": "OpenAI Image Tools",
    "pluginKey": "image_gen_oai",
@ -71,24 +57,11 @@
      }
    ]
  },
-  {
-    "name": "Browser",
-    "pluginKey": "web-browser",
-    "description": "Scrape and summarize webpage data",
-    "icon": "assets/web-browser.svg",
-    "authConfig": [
-      {
-        "authField": "OPENAI_API_KEY",
-        "label": "OpenAI API Key",
-        "description": "Browser makes use of OpenAI embeddings"
-      }
-    ]
-  },
  {
    "name": "DALL-E-3",
    "pluginKey": "dalle",
    "description": "[DALL-E-3] Create realistic images and art from a description in natural language",
-    "icon": "https://i.imgur.com/u2TzXzH.png",
+    "icon": "assets/openai.svg",
    "authConfig": [
      {
        "authField": "DALLE3_API_KEY||DALLE_API_KEY",
@ -101,7 +74,7 @@
    "name": "Tavily Search",
    "pluginKey": "tavily_search_results_json",
    "description": "Tavily Search is a robust search API tailored for LLM Agents. It seamlessly integrates with diverse data sources to ensure a superior, relevant search experience.",
-    "icon": "https://tavily.com/favicon.ico",
+    "icon": "assets/tavily.svg",
    "authConfig": [
      {
        "authField": "TAVILY_API_KEY",
@ -114,14 +87,14 @@
    "name": "Calculator",
    "pluginKey": "calculator",
    "description": "Perform simple and complex mathematical calculations.",
-    "icon": "https://i.imgur.com/RHsSG5h.png",
+    "icon": "assets/calculator.svg",
    "authConfig": []
  },
  {
    "name": "Stable Diffusion",
    "pluginKey": "stable-diffusion",
    "description": "Generate photo-realistic images given any text input.",
-    "icon": "https://i.imgur.com/Yr466dp.png",
+    "icon": "assets/stability-ai.svg",
    "authConfig": [
      {
        "authField": "SD_WEBUI_URL",
@ -134,7 +107,7 @@
    "name": "Azure AI Search",
    "pluginKey": "azure-ai-search",
    "description": "Use Azure AI Search to find information",
-    "icon": "https://i.imgur.com/E7crPze.png",
+    "icon": "assets/azure-ai-search.svg",
    "authConfig": [
      {
        "authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
@ -170,7 +143,7 @@
    "name": "Flux",
    "pluginKey": "flux",
    "description": "Generate images using text with the Flux API.",
-    "icon": "https://blackforestlabs.ai/wp-content/uploads/2024/07/bfl_logo_retraced_blk.png",
+    "icon": "assets/bfl-ai.svg",
    "isAuthRequired": "true",
    "authConfig": [
      {
@ -183,14 +156,14 @@
  {
    "name": "Gemini Image Tools",
    "pluginKey": "gemini_image_gen",
-    "toolkit": true,
    "description": "Generate high-quality images using Google's Gemini Image Models. Supports Gemini API or Vertex AI.",
    "icon": "assets/gemini_image_gen.svg",
    "authConfig": [
      {
-        "authField": "GEMINI_API_KEY||GOOGLE_KEY||GEMINI_VERTEX_ENABLED",
-        "label": "Gemini API Key (Optional if Vertex AI is configured)",
-        "description": "Your Google Gemini API Key from <a href='https://aistudio.google.com/app/apikey' target='_blank'>Google AI Studio</a>. Leave blank if using Vertex AI with service account."
+        "authField": "GEMINI_API_KEY||GOOGLE_KEY||GOOGLE_SERVICE_KEY_FILE",
+        "label": "Gemini API Key (optional)",
+        "description": "Your Google Gemini API Key from <a href='https://aistudio.google.com/app/apikey' target='_blank'>Google AI Studio</a>. Leave blank to use Vertex AI with a service account (GOOGLE_SERVICE_KEY_FILE or api/data/auth.json).",
+        "optional": true
      }
    ]
  }
--- a/api/app/clients/tools/structured/AzureAISearch.js
+++ b/api/app/clients/tools/structured/AzureAISearch.js
@ -1,14 +1,28 @@
-const { z } = require('zod');
 const { Tool } = require('@langchain/core/tools');
 const { logger } = require('@librechat/data-schemas');
 const { SearchClient, AzureKeyCredential } = require('@azure/search-documents');

+const azureAISearchJsonSchema = {
+  type: 'object',
+  properties: {
+    query: {
+      type: 'string',
+      description: 'Search word or phrase to Azure AI Search',
+    },
+  },
+  required: ['query'],
+};
+
 class AzureAISearch extends Tool {
  // Constants for default values
  static DEFAULT_API_VERSION = '2023-11-01';
  static DEFAULT_QUERY_TYPE = 'simple';
  static DEFAULT_TOP = 5;

+  static get jsonSchema() {
+    return azureAISearchJsonSchema;
+  }
+
  // Helper function for initializing properties
  _initializeField(field, envVar, defaultValue) {
    return field || process.env[envVar] || defaultValue;
@ -22,10 +36,7 @@ class AzureAISearch extends Tool {
    /* Used to initialize the Tool without necessary variables. */
    this.override = fields.override ?? false;

-    // Define schema
-    this.schema = z.object({
-      query: z.string().describe('Search word or phrase to Azure AI Search'),
-    });
+    this.schema = azureAISearchJsonSchema;

    // Initialize properties using helper function
    this.serviceEndpoint = this._initializeField(
--- a/api/app/clients/tools/structured/DALLE3.js
+++ b/api/app/clients/tools/structured/DALLE3.js
@ -1,4 +1,3 @@
-const { z } = require('zod');
 const path = require('path');
 const OpenAI = require('openai');
 const { v4: uuidv4 } = require('uuid');
@ -8,6 +7,36 @@ const { logger } = require('@librechat/data-schemas');
 const { getImageBasename, extractBaseURL } = require('@librechat/api');
 const { FileContext, ContentTypes } = require('librechat-data-provider');

+const dalle3JsonSchema = {
+  type: 'object',
+  properties: {
+    prompt: {
+      type: 'string',
+      maxLength: 4000,
+      description:
+        'A text description of the desired image, following the rules, up to 4000 characters.',
+    },
+    style: {
+      type: 'string',
+      enum: ['vivid', 'natural'],
+      description:
+        'Must be one of `vivid` or `natural`. `vivid` generates hyper-real and dramatic images, `natural` produces more natural, less hyper-real looking images',
+    },
+    quality: {
+      type: 'string',
+      enum: ['hd', 'standard'],
+      description: 'The quality of the generated image. Only `hd` and `standard` are supported.',
+    },
+    size: {
+      type: 'string',
+      enum: ['1024x1024', '1792x1024', '1024x1792'],
+      description:
+        'The size of the requested image. Use 1024x1024 (square) as the default, 1792x1024 if the user requests a wide image, and 1024x1792 for full-body portraits. Always include this parameter in the request.',
+    },
+  },
+  required: ['prompt', 'style', 'quality', 'size'],
+};
+
 const displayMessage =
  "DALL-E displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";
 class DALLE3 extends Tool {
@ -72,27 +101,11 @@ class DALLE3 extends Tool {
    // The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images.
    // All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long.
    // - The "vivid" style is HIGHLY preferred, but "natural" is also supported.`;
-    this.schema = z.object({
-      prompt: z
-        .string()
-        .max(4000)
-        .describe(
-          'A text description of the desired image, following the rules, up to 4000 characters.',
-        ),
-      style: z
-        .enum(['vivid', 'natural'])
-        .describe(
-          'Must be one of `vivid` or `natural`. `vivid` generates hyper-real and dramatic images, `natural` produces more natural, less hyper-real looking images',
-        ),
-      quality: z
-        .enum(['hd', 'standard'])
-        .describe('The quality of the generated image. Only `hd` and `standard` are supported.'),
-      size: z
-        .enum(['1024x1024', '1792x1024', '1024x1792'])
-        .describe(
-          'The size of the requested image. Use 1024x1024 (square) as the default, 1792x1024 if the user requests a wide image, and 1024x1792 for full-body portraits. Always include this parameter in the request.',
-        ),
-    });
+    this.schema = dalle3JsonSchema;
+  }
+
+  static get jsonSchema() {
+    return dalle3JsonSchema;
  }

  getApiKey() {
--- a/api/app/clients/tools/structured/FluxAPI.js
+++ b/api/app/clients/tools/structured/FluxAPI.js
@ -1,4 +1,3 @@
-const { z } = require('zod');
 const axios = require('axios');
 const fetch = require('node-fetch');
 const { v4: uuidv4 } = require('uuid');
@ -7,6 +6,84 @@ const { logger } = require('@librechat/data-schemas');
 const { HttpsProxyAgent } = require('https-proxy-agent');
 const { FileContext, ContentTypes } = require('librechat-data-provider');

+const fluxApiJsonSchema = {
+  type: 'object',
+  properties: {
+    action: {
+      type: 'string',
+      enum: ['generate', 'list_finetunes', 'generate_finetuned'],
+      description:
+        'Action to perform: "generate" for image generation, "generate_finetuned" for finetuned model generation, "list_finetunes" to get available custom models',
+    },
+    prompt: {
+      type: 'string',
+      description:
+        'Text prompt for image generation. Required when action is "generate". Not used for list_finetunes.',
+    },
+    width: {
+      type: 'number',
+      description:
+        'Width of the generated image in pixels. Must be a multiple of 32. Default is 1024.',
+    },
+    height: {
+      type: 'number',
+      description:
+        'Height of the generated image in pixels. Must be a multiple of 32. Default is 768.',
+    },
+    prompt_upsampling: {
+      type: 'boolean',
+      description: 'Whether to perform upsampling on the prompt.',
+    },
+    steps: {
+      type: 'integer',
+      description: 'Number of steps to run the model for, a number from 1 to 50. Default is 40.',
+    },
+    seed: {
+      type: 'number',
+      description: 'Optional seed for reproducibility.',
+    },
+    safety_tolerance: {
+      type: 'number',
+      description:
+        'Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.',
+    },
+    endpoint: {
+      type: 'string',
+      enum: [
+        '/v1/flux-pro-1.1',
+        '/v1/flux-pro',
+        '/v1/flux-dev',
+        '/v1/flux-pro-1.1-ultra',
+        '/v1/flux-pro-finetuned',
+        '/v1/flux-pro-1.1-ultra-finetuned',
+      ],
+      description: 'Endpoint to use for image generation.',
+    },
+    raw: {
+      type: 'boolean',
+      description:
+        'Generate less processed, more natural-looking images. Only works for /v1/flux-pro-1.1-ultra.',
+    },
+    finetune_id: {
+      type: 'string',
+      description: 'ID of the finetuned model to use',
+    },
+    finetune_strength: {
+      type: 'number',
+      description: 'Strength of the finetuning effect (typically between 0.1 and 1.2)',
+    },
+    guidance: {
+      type: 'number',
+      description: 'Guidance scale for finetuned models',
+    },
+    aspect_ratio: {
+      type: 'string',
+      description: 'Aspect ratio for ultra models (e.g., "16:9")',
+    },
+  },
+  required: [],
+};
+
 const displayMessage =
  "Flux displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";

@ -57,82 +134,11 @@ class FluxAPI extends Tool {
    // Add base URL from environment variable with fallback
    this.baseUrl = process.env.FLUX_API_BASE_URL || 'https://api.us1.bfl.ai';

-    // Define the schema for structured input
-    this.schema = z.object({
-      action: z
-        .enum(['generate', 'list_finetunes', 'generate_finetuned'])
-        .default('generate')
-        .describe(
-          'Action to perform: "generate" for image generation, "generate_finetuned" for finetuned model generation, "list_finetunes" to get available custom models',
-        ),
-      prompt: z
-        .string()
-        .optional()
-        .describe(
-          'Text prompt for image generation. Required when action is "generate". Not used for list_finetunes.',
-        ),
-      width: z
-        .number()
-        .optional()
-        .describe(
-          'Width of the generated image in pixels. Must be a multiple of 32. Default is 1024.',
-        ),
-      height: z
-        .number()
-        .optional()
-        .describe(
-          'Height of the generated image in pixels. Must be a multiple of 32. Default is 768.',
-        ),
-      prompt_upsampling: z
-        .boolean()
-        .optional()
-        .default(false)
-        .describe('Whether to perform upsampling on the prompt.'),
-      steps: z
-        .number()
-        .int()
-        .optional()
-        .describe('Number of steps to run the model for, a number from 1 to 50. Default is 40.'),
-      seed: z.number().optional().describe('Optional seed for reproducibility.'),
-      safety_tolerance: z
-        .number()
-        .optional()
-        .default(6)
-        .describe(
-          'Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.',
-        ),
-      endpoint: z
-        .enum([
-          '/v1/flux-pro-1.1',
-          '/v1/flux-pro',
-          '/v1/flux-dev',
-          '/v1/flux-pro-1.1-ultra',
-          '/v1/flux-pro-finetuned',
-          '/v1/flux-pro-1.1-ultra-finetuned',
-        ])
-        .optional()
-        .default('/v1/flux-pro-1.1')
-        .describe('Endpoint to use for image generation.'),
-      raw: z
-        .boolean()
-        .optional()
-        .default(false)
-        .describe(
-          'Generate less processed, more natural-looking images. Only works for /v1/flux-pro-1.1-ultra.',
-        ),
-      finetune_id: z.string().optional().describe('ID of the finetuned model to use'),
-      finetune_strength: z
-        .number()
-        .optional()
-        .default(1.1)
-        .describe('Strength of the finetuning effect (typically between 0.1 and 1.2)'),
-      guidance: z.number().optional().default(2.5).describe('Guidance scale for finetuned models'),
-      aspect_ratio: z
-        .string()
-        .optional()
-        .default('16:9')
-        .describe('Aspect ratio for ultra models (e.g., "16:9")'),
-    });
+    this.schema = fluxApiJsonSchema;
+  }
+
+  static get jsonSchema() {
+    return fluxApiJsonSchema;
  }

  getAxiosConfig() {
--- a/api/app/clients/tools/structured/GeminiImageGen.js
+++ b/api/app/clients/tools/structured/GeminiImageGen.js
@ -1,16 +1,11 @@
-const fs = require('fs');
 const path = require('path');
 const sharp = require('sharp');
 const { v4 } = require('uuid');
+const { ProxyAgent } = require('undici');
 const { GoogleGenAI } = require('@google/genai');
 const { tool } = require('@langchain/core/tools');
 const { logger } = require('@librechat/data-schemas');
-const {
-  FileContext,
-  ContentTypes,
-  FileSources,
-  EImageOutputType,
-} = require('librechat-data-provider');
+const { ContentTypes, EImageOutputType } = require('librechat-data-provider');
 const {
  geminiToolkit,
  loadServiceKey,
@ -21,6 +16,24 @@ const { getStrategyFunctions } = require('~/server/services/Files/strategies');
 const { spendTokens } = require('~/models/spendTokens');
 const { getFiles } = require('~/models/File');

+/**
+ * Configure proxy support for Google APIs
+ * This wraps globalThis.fetch to add a proxy dispatcher only for googleapis.com URLs
+ * This is necessary because @google/genai SDK doesn't support custom fetch or httpOptions.dispatcher
+ */
+if (process.env.PROXY) {
+  const originalFetch = globalThis.fetch;
+  const proxyAgent = new ProxyAgent(process.env.PROXY);
+
+  globalThis.fetch = function (url, options = {}) {
+    const urlString = url.toString();
+    if (urlString.includes('googleapis.com')) {
+      options = { ...options, dispatcher: proxyAgent };
+    }
+    return originalFetch.call(this, url, options);
+  };
+}
+
 /**
 * Get the default service key file path (consistent with main Google endpoint)
 * @returns {string} - The default path to the service key file
@ -40,17 +53,12 @@ const displayMessage =
 * @returns {string} - The processed string
 */
 function replaceUnwantedChars(inputString) {
-  return inputString?.replace(/[^\w\s\-_.,!?()]/g, '') || '';
-}
-
-/**
- * Validate and sanitize image format
- * @param {string} format - The format to validate
- * @returns {string} - Safe format
- */
-function getSafeFormat(format) {
-  const allowedFormats = ['png', 'jpg', 'jpeg', 'webp', 'gif'];
-  return allowedFormats.includes(format?.toLowerCase()) ? format.toLowerCase() : 'png';
+  return (
+    inputString
+      ?.replace(/\r\n|\r|\n/g, ' ')
+      .replace(/"/g, '')
+      .trim() || ''
+  );
 }

 /**
@ -98,11 +106,8 @@ async function initializeGeminiClient(options = {}) {
    return new GoogleGenAI({ apiKey: googleKey });
  }

-  // Fall back to Vertex AI with service account
  logger.debug('[GeminiImageGen] Using Vertex AI with service account');
  const credentialsPath = getDefaultServiceKeyPath();
-
-  // Use loadServiceKey for consistent loading (supports file paths, JSON strings, base64)
  const serviceKey = await loadServiceKey(credentialsPath);

  if (!serviceKey || !serviceKey.project_id) {
@ -112,75 +117,14 @@ async function initializeGeminiClient(options = {}) {
    );
  }

-  // Set GOOGLE_APPLICATION_CREDENTIALS for any Google Cloud SDK dependencies
-  try {
-    await fs.promises.access(credentialsPath);
-    process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
-  } catch {
-    // File doesn't exist, skip setting env var
-  }
-
  return new GoogleGenAI({
    vertexai: true,
    project: serviceKey.project_id,
    location: process.env.GOOGLE_LOC || process.env.GOOGLE_CLOUD_LOCATION || 'global',
+    googleAuthOptions: { credentials: serviceKey },
  });
 }

-/**
- * Save image to local filesystem
- * @param {string} base64Data - Base64 encoded image data
- * @param {string} format - Image format
- * @param {string} userId - User ID
- * @returns {Promise<string>} - The relative URL
- */
-async function saveImageLocally(base64Data, format, userId) {
-  const safeFormat = getSafeFormat(format);
-  const safeUserId = userId ? path.basename(userId) : 'default';
-  const imageName = `gemini-img-${v4()}.${safeFormat}`;
-  const userDir = path.join(process.cwd(), 'client/public/images', safeUserId);
-
-  await fs.promises.mkdir(userDir, { recursive: true });
-
-  const filePath = path.join(userDir, imageName);
-  await fs.promises.writeFile(filePath, Buffer.from(base64Data, 'base64'));
-
-  logger.debug('[GeminiImageGen] Image saved locally to:', filePath);
-  return `/images/${safeUserId}/${imageName}`;
-}
-
-/**
- * Save image to cloud storage
- * @param {Object} params - Parameters
- * @returns {Promise<string|null>} - The storage URL or null
- */
-async function saveToCloudStorage({ base64Data, format, processFileURL, fileStrategy, userId }) {
-  if (!processFileURL || !fileStrategy || !userId) {
-    return null;
-  }
-
-  try {
-    const safeFormat = getSafeFormat(format);
-    const safeUserId = path.basename(userId);
-    const dataURL = `data:image/${safeFormat};base64,${base64Data}`;
-    const imageName = `gemini-img-${v4()}.${safeFormat}`;
-
-    const result = await processFileURL({
-      URL: dataURL,
-      basePath: 'images',
-      userId: safeUserId,
-      fileName: imageName,
-      fileStrategy,
-      context: FileContext.image_generation,
-    });
-
-    return result.filepath;
-  } catch (error) {
-    logger.error('[GeminiImageGen] Error saving to cloud storage:', error);
-    return null;
-  }
-}
-
 /**
 * Convert image files to Gemini inline data format
 * @param {Object} params - Parameters
@ -307,8 +251,9 @@ function checkForSafetyBlock(response) {
 * @param {string} params.userId - The user ID
 * @param {string} params.conversationId - The conversation ID
 * @param {string} params.model - The model name
+ * @param {string} [params.messageId] - The response message ID for transaction correlation
 */
-async function recordTokenUsage({ usageMetadata, req, userId, conversationId, model }) {
+async function recordTokenUsage({ usageMetadata, req, userId, conversationId, model, messageId }) {
  if (!usageMetadata) {
    logger.debug('[GeminiImageGen] No usage metadata available for balance tracking');
    return;
@ -344,6 +289,7 @@ async function recordTokenUsage({ usageMetadata, req, userId, conversationId, mo
      {
        user: userId,
        model,
+        messageId,
        conversationId,
        context: 'image_generation',
        balance,
@ -371,34 +317,18 @@ function createGeminiImageTool(fields = {}) {
    throw new Error('This tool is only available for agents.');
  }

-  // Skip validation during tool creation - validation happens at runtime in initializeGeminiClient
-  // This allows the tool to be added to agents when using Vertex AI without requiring API keys
-  // The actual credentials check happens when the tool is invoked
-
-  const {
-    req,
-    imageFiles = [],
-    processFileURL,
-    userId,
-    fileStrategy,
-    GEMINI_API_KEY,
-    GOOGLE_KEY,
-    // GEMINI_VERTEX_ENABLED is used for auth validation only (not used in code)
-    // When set as env var, it signals Vertex AI is configured and bypasses API key requirement
-  } = fields;
+  const { req, imageFiles = [], userId, fileStrategy, GEMINI_API_KEY, GOOGLE_KEY } = fields;

  const imageOutputType = fields.imageOutputType || EImageOutputType.PNG;

  const geminiImageGenTool = tool(
-    async ({ prompt, image_ids, aspectRatio, imageSize }, _runnableConfig) => {
+    async ({ prompt, image_ids, aspectRatio, imageSize }, runnableConfig) => {
      if (!prompt) {
        throw new Error('Missing required field: prompt');
      }

-      logger.debug('[GeminiImageGen] Generating image with prompt:', prompt?.substring(0, 100));
-      logger.debug('[GeminiImageGen] Options:', { aspectRatio, imageSize });
+      logger.debug('[GeminiImageGen] Generating image', { aspectRatio, imageSize });

-      // Initialize Gemini client with user-provided credentials
      let ai;
      try {
        ai = await initializeGeminiClient({
@ -413,10 +343,8 @@ function createGeminiImageTool(fields = {}) {
        ];
      }

-      // Build request contents
      const contents = [{ text: replaceUnwantedChars(prompt) }];

-      // Add context images if provided
      if (image_ids?.length > 0) {
        const contextImages = await convertImagesToInlineData({
          imageFiles,
@ -428,28 +356,34 @@ function createGeminiImageTool(fields = {}) {
        logger.debug('[GeminiImageGen] Added', contextImages.length, 'context images');
      }

-      // Generate image
      let apiResponse;
      const geminiModel = process.env.GEMINI_IMAGE_MODEL || 'gemini-2.5-flash-image';
-      try {
-        // Build config with optional imageConfig
-        const config = {
-          responseModalities: ['TEXT', 'IMAGE'],
-        };
+      const config = {
+        responseModalities: ['TEXT', 'IMAGE'],
+      };

-        // Add imageConfig if aspectRatio or imageSize is specified
-        // Note: gemini-2.5-flash-image doesn't support imageSize
-        const supportsImageSize = !geminiModel.includes('gemini-2.5-flash-image');
-        if (aspectRatio || (imageSize && supportsImageSize)) {
-          config.imageConfig = {};
-          if (aspectRatio) {
-            config.imageConfig.aspectRatio = aspectRatio;
-          }
-          if (imageSize && supportsImageSize) {
-            config.imageConfig.imageSize = imageSize;
-          }
+      const supportsImageSize = !geminiModel.includes('gemini-2.5-flash-image');
+      if (aspectRatio || (imageSize && supportsImageSize)) {
+        config.imageConfig = {};
+        if (aspectRatio) {
+          config.imageConfig.aspectRatio = aspectRatio;
        }
+        if (imageSize && supportsImageSize) {
+          config.imageConfig.imageSize = imageSize;
+        }
+      }

+      let derivedSignal = null;
+      let abortHandler = null;
+
+      if (runnableConfig?.signal) {
+        derivedSignal = AbortSignal.any([runnableConfig.signal]);
+        abortHandler = () => logger.debug('[GeminiImageGen] Image generation aborted');
+        derivedSignal.addEventListener('abort', abortHandler, { once: true });
+        config.abortSignal = derivedSignal;
+      }
+
+      try {
        apiResponse = await ai.models.generateContent({
          model: geminiModel,
          contents,
@ -461,9 +395,12 @@ function createGeminiImageTool(fields = {}) {
          [{ type: ContentTypes.TEXT, text: `Image generation failed: ${error.message}` }],
          { content: [], file_ids: [] },
        ];
+      } finally {
+        if (abortHandler && derivedSignal) {
+          derivedSignal.removeEventListener('abort', abortHandler);
+        }
      }

-      // Check for safety blocks
      const safetyBlock = checkForSafetyBlock(apiResponse);
      if (safetyBlock) {
        logger.warn('[GeminiImageGen] Safety block:', safetyBlock);
@ -490,46 +427,7 @@ function createGeminiImageTool(fields = {}) {
      const imageData = convertedBuffer.toString('base64');
      const mimeType = outputFormat === 'jpeg' ? 'image/jpeg' : `image/${outputFormat}`;

-      logger.debug('[GeminiImageGen] Image format:', { outputFormat, mimeType });
-
-      let imageUrl;
-      const useLocalStorage = !fileStrategy || fileStrategy === FileSources.local;
-
-      if (useLocalStorage) {
-        try {
-          imageUrl = await saveImageLocally(imageData, outputFormat, userId);
-        } catch (error) {
-          logger.error('[GeminiImageGen] Local save failed:', error);
-          imageUrl = `data:${mimeType};base64,${imageData}`;
-        }
-      } else {
-        const cloudUrl = await saveToCloudStorage({
-          base64Data: imageData,
-          format: outputFormat,
-          processFileURL,
-          fileStrategy,
-          userId,
-        });
-
-        if (cloudUrl) {
-          imageUrl = cloudUrl;
-        } else {
-          // Fallback to local
-          try {
-            imageUrl = await saveImageLocally(imageData, outputFormat, userId);
-          } catch (_error) {
-            imageUrl = `data:${mimeType};base64,${imageData}`;
-          }
-        }
-      }
-
-      logger.debug('[GeminiImageGen] Image URL:', imageUrl);
-
-      // For the artifact, we need a data URL (same as OpenAI)
-      // The local file save is for persistence, but the response needs a data URL
      const dataUrl = `data:${mimeType};base64,${imageData}`;
-
-      // Return in content_and_artifact format (same as OpenAI)
      const file_ids = [v4()];
      const content = [
        {
@ -548,12 +446,15 @@ function createGeminiImageTool(fields = {}) {
        },
      ];

-      // Record token usage for balance tracking (don't await to avoid blocking response)
-      const conversationId = _runnableConfig?.configurable?.thread_id;
+      const conversationId = runnableConfig?.configurable?.thread_id;
+      const messageId =
+        runnableConfig?.configurable?.run_id ??
+        runnableConfig?.configurable?.requestBody?.messageId;
      recordTokenUsage({
        usageMetadata: apiResponse.usageMetadata,
        req,
        userId,
+        messageId,
        conversationId,
        model: geminiModel,
      }).catch((error) => {
--- a/api/app/clients/tools/structured/GoogleSearch.js
+++ b/api/app/clients/tools/structured/GoogleSearch.js
@ -1,12 +1,33 @@
-const { z } = require('zod');
 const { Tool } = require('@langchain/core/tools');
 const { getEnvironmentVariable } = require('@langchain/core/utils/env');

+const googleSearchJsonSchema = {
+  type: 'object',
+  properties: {
+    query: {
+      type: 'string',
+      minLength: 1,
+      description: 'The search query string.',
+    },
+    max_results: {
+      type: 'integer',
+      minimum: 1,
+      maximum: 10,
+      description: 'The maximum number of search results to return. Defaults to 5.',
+    },
+  },
+  required: ['query'],
+};
+
 class GoogleSearchResults extends Tool {
  static lc_name() {
    return 'google';
  }

+  static get jsonSchema() {
+    return googleSearchJsonSchema;
+  }
+
  constructor(fields = {}) {
    super(fields);
    this.name = 'google';
@ -28,25 +49,11 @@ class GoogleSearchResults extends Tool {
    this.description =
      'A search engine optimized for comprehensive, accurate, and trusted results. Useful for when you need to answer questions about current events.';

-    this.schema = z.object({
-      query: z.string().min(1).describe('The search query string.'),
-      max_results: z
-        .number()
-        .min(1)
-        .max(10)
-        .optional()
-        .describe('The maximum number of search results to return. Defaults to 10.'),
-      // Note: Google API has its own parameters for search customization, adjust as needed.
-    });
+    this.schema = googleSearchJsonSchema;
  }

  async _call(input) {
-    const validationResult = this.schema.safeParse(input);
-    if (!validationResult.success) {
-      throw new Error(`Validation failed: ${JSON.stringify(validationResult.error.issues)}`);
-    }
-
-    const { query, max_results = 5 } = validationResult.data;
+    const { query, max_results = 5 } = input;

    const response = await fetch(
      `https://www.googleapis.com/customsearch/v1?key=${this.apiKey}&cx=${
--- a/api/app/clients/tools/structured/OpenWeather.js
+++ b/api/app/clients/tools/structured/OpenWeather.js
@ -1,8 +1,52 @@
 const { Tool } = require('@langchain/core/tools');
-const { z } = require('zod');
 const { getEnvironmentVariable } = require('@langchain/core/utils/env');
 const fetch = require('node-fetch');

+const openWeatherJsonSchema = {
+  type: 'object',
+  properties: {
+    action: {
+      type: 'string',
+      enum: ['help', 'current_forecast', 'timestamp', 'daily_aggregation', 'overview'],
+      description: 'The action to perform',
+    },
+    city: {
+      type: 'string',
+      description: 'City name for geocoding if lat/lon not provided',
+    },
+    lat: {
+      type: 'number',
+      description: 'Latitude coordinate',
+    },
+    lon: {
+      type: 'number',
+      description: 'Longitude coordinate',
+    },
+    exclude: {
+      type: 'string',
+      description: 'Parts to exclude from the response',
+    },
+    units: {
+      type: 'string',
+      enum: ['Celsius', 'Kelvin', 'Fahrenheit'],
+      description: 'Temperature units',
+    },
+    lang: {
+      type: 'string',
+      description: 'Language code',
+    },
+    date: {
+      type: 'string',
+      description: 'Date in YYYY-MM-DD format for timestamp and daily_aggregation',
+    },
+    tz: {
+      type: 'string',
+      description: 'Timezone',
+    },
+  },
+  required: ['action'],
+};
+
 /**
 * Map user-friendly units to OpenWeather units.
 * Defaults to Celsius if not specified.
@ -66,17 +110,11 @@ class OpenWeather extends Tool {
    'Units: "Celsius", "Kelvin", or "Fahrenheit" (default: Celsius). ' +
    'For timestamp action, use "date" in YYYY-MM-DD format.';

-  schema = z.object({
-    action: z.enum(['help', 'current_forecast', 'timestamp', 'daily_aggregation', 'overview']),
-    city: z.string().optional(),
-    lat: z.number().optional(),
-    lon: z.number().optional(),
-    exclude: z.string().optional(),
-    units: z.enum(['Celsius', 'Kelvin', 'Fahrenheit']).optional(),
-    lang: z.string().optional(),
-    date: z.string().optional(), // For timestamp and daily_aggregation
-    tz: z.string().optional(),
-  });
+  schema = openWeatherJsonSchema;
+
+  static get jsonSchema() {
+    return openWeatherJsonSchema;
+  }

  constructor(fields = {}) {
    super();
--- a/api/app/clients/tools/structured/StableDiffusion.js
+++ b/api/app/clients/tools/structured/StableDiffusion.js
@ -1,6 +1,5 @@
 // Generates image using stable diffusion webui's api (automatic1111)
 const fs = require('fs');
-const { z } = require('zod');
 const path = require('path');
 const axios = require('axios');
 const sharp = require('sharp');
@ -11,6 +10,23 @@ const { FileContext, ContentTypes } = require('librechat-data-provider');
 const { getBasePath } = require('@librechat/api');
 const paths = require('~/config/paths');

+const stableDiffusionJsonSchema = {
+  type: 'object',
+  properties: {
+    prompt: {
+      type: 'string',
+      description:
+        'Detailed keywords to describe the subject, using at least 7 keywords to accurately describe the image, separated by comma',
+    },
+    negative_prompt: {
+      type: 'string',
+      description:
+        'Keywords we want to exclude from the final image, using at least 7 keywords to accurately describe the image, separated by comma',
+    },
+  },
+  required: ['prompt', 'negative_prompt'],
+};
+
 const displayMessage =
  "Stable Diffusion displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";

@ -46,18 +62,11 @@ class StableDiffusionAPI extends Tool {
 // - Generate images only once per human query unless explicitly requested by the user`;
    this.description =
      "You can generate images using text with 'stable-diffusion'. This tool is exclusively for visual content.";
-    this.schema = z.object({
-      prompt: z
-        .string()
-        .describe(
-          'Detailed keywords to describe the subject, using at least 7 keywords to accurately describe the image, separated by comma',
-        ),
-      negative_prompt: z
-        .string()
-        .describe(
-          'Keywords we want to exclude from the final image, using at least 7 keywords to accurately describe the image, separated by comma',
-        ),
-    });
+    this.schema = stableDiffusionJsonSchema;
+  }
+
+  static get jsonSchema() {
+    return stableDiffusionJsonSchema;
  }

  replaceNewLinesWithSpaces(inputString) {
--- a/api/app/clients/tools/structured/TavilySearchResults.js
+++ b/api/app/clients/tools/structured/TavilySearchResults.js
@ -1,8 +1,75 @@
-const { z } = require('zod');
 const { ProxyAgent, fetch } = require('undici');
 const { Tool } = require('@langchain/core/tools');
 const { getEnvironmentVariable } = require('@langchain/core/utils/env');

+const tavilySearchJsonSchema = {
+  type: 'object',
+  properties: {
+    query: {
+      type: 'string',
+      minLength: 1,
+      description: 'The search query string.',
+    },
+    max_results: {
+      type: 'number',
+      minimum: 1,
+      maximum: 10,
+      description: 'The maximum number of search results to return. Defaults to 5.',
+    },
+    search_depth: {
+      type: 'string',
+      enum: ['basic', 'advanced'],
+      description:
+        'The depth of the search, affecting result quality and response time (`basic` or `advanced`). Default is basic for quick results and advanced for indepth high quality results but longer response time. Advanced calls equals 2 requests.',
+    },
+    include_images: {
+      type: 'boolean',
+      description:
+        'Whether to include a list of query-related images in the response. Default is False.',
+    },
+    include_answer: {
+      type: 'boolean',
+      description: 'Whether to include answers in the search results. Default is False.',
+    },
+    include_raw_content: {
+      type: 'boolean',
+      description: 'Whether to include raw content in the search results. Default is False.',
+    },
+    include_domains: {
+      type: 'array',
+      items: { type: 'string' },
+      description: 'A list of domains to specifically include in the search results.',
+    },
+    exclude_domains: {
+      type: 'array',
+      items: { type: 'string' },
+      description: 'A list of domains to specifically exclude from the search results.',
+    },
+    topic: {
+      type: 'string',
+      enum: ['general', 'news', 'finance'],
+      description:
+        'The category of the search. Use news ONLY if query SPECIFCALLY mentions the word "news".',
+    },
+    time_range: {
+      type: 'string',
+      enum: ['day', 'week', 'month', 'year', 'd', 'w', 'm', 'y'],
+      description: 'The time range back from the current date to filter results.',
+    },
+    days: {
+      type: 'number',
+      minimum: 1,
+      description: 'Number of days back from the current date to include. Only if topic is news.',
+    },
+    include_image_descriptions: {
+      type: 'boolean',
+      description:
+        'When include_images is true, also add a descriptive text for each image. Default is false.',
+    },
+  },
+  required: ['query'],
+};
+
 class TavilySearchResults extends Tool {
  static lc_name() {
    return 'TavilySearchResults';
@ -20,64 +87,11 @@ class TavilySearchResults extends Tool {
    this.description =
      'A search engine optimized for comprehensive, accurate, and trusted results. Useful for when you need to answer questions about current events.';

-    this.schema = z.object({
-      query: z.string().min(1).describe('The search query string.'),
-      max_results: z
-        .number()
-        .min(1)
-        .max(10)
-        .optional()
-        .describe('The maximum number of search results to return. Defaults to 5.'),
-      search_depth: z
-        .enum(['basic', 'advanced'])
-        .optional()
-        .describe(
-          'The depth of the search, affecting result quality and response time (`basic` or `advanced`). Default is basic for quick results and advanced for indepth high quality results but longer response time. Advanced calls equals 2 requests.',
-        ),
-      include_images: z
-        .boolean()
-        .optional()
-        .describe(
-          'Whether to include a list of query-related images in the response. Default is False.',
-        ),
-      include_answer: z
-        .boolean()
-        .optional()
-        .describe('Whether to include answers in the search results. Default is False.'),
-      include_raw_content: z
-        .boolean()
-        .optional()
-        .describe('Whether to include raw content in the search results. Default is False.'),
-      include_domains: z
-        .array(z.string())
-        .optional()
-        .describe('A list of domains to specifically include in the search results.'),
-      exclude_domains: z
-        .array(z.string())
-        .optional()
-        .describe('A list of domains to specifically exclude from the search results.'),
-      topic: z
-        .enum(['general', 'news', 'finance'])
-        .optional()
-        .describe(
-          'The category of the search. Use news ONLY if query SPECIFCALLY mentions the word "news".',
-        ),
-      time_range: z
-        .enum(['day', 'week', 'month', 'year', 'd', 'w', 'm', 'y'])
-        .optional()
-        .describe('The time range back from the current date to filter results.'),
-      days: z
-        .number()
-        .min(1)
-        .optional()
-        .describe('Number of days back from the current date to include. Only if topic is news.'),
-      include_image_descriptions: z
-        .boolean()
-        .optional()
-        .describe(
-          'When include_images is true, also add a descriptive text for each image. Default is false.',
-        ),
-    });
+    this.schema = tavilySearchJsonSchema;
+  }
+
+  static get jsonSchema() {
+    return tavilySearchJsonSchema;
  }

  getApiKey() {
@ -89,12 +103,7 @@ class TavilySearchResults extends Tool {
  }

  async _call(input) {
-    const validationResult = this.schema.safeParse(input);
-    if (!validationResult.success) {
-      throw new Error(`Validation failed: ${JSON.stringify(validationResult.error.issues)}`);
-    }
-
-    const { query, ...rest } = validationResult.data;
+    const { query, ...rest } = input;

    const requestBody = {
      api_key: this.apiKey,
--- a/api/app/clients/tools/structured/TraversaalSearch.js
+++ b/api/app/clients/tools/structured/TraversaalSearch.js
@ -1,8 +1,19 @@
-const { z } = require('zod');
 const { Tool } = require('@langchain/core/tools');
 const { logger } = require('@librechat/data-schemas');
 const { getEnvironmentVariable } = require('@langchain/core/utils/env');

+const traversaalSearchJsonSchema = {
+  type: 'object',
+  properties: {
+    query: {
+      type: 'string',
+      description:
+        "A properly written sentence to be interpreted by an AI to search the web according to the user's request.",
+    },
+  },
+  required: ['query'],
+};
+
 /**
 * Tool for the Traversaal AI search API, Ares.
 */
@ -17,17 +28,15 @@ class TraversaalSearch extends Tool {
    Useful for when you need to answer questions about current events. Input should be a search query.`;
    this.description_for_model =
      '\'Please create a specific sentence for the AI to understand and use as a query to search the web based on the user\'s request. For example, "Find information about the highest mountains in the world." or "Show me the latest news articles about climate change and its impact on polar ice caps."\'';
-    this.schema = z.object({
-      query: z
-        .string()
-        .describe(
-          "A properly written sentence to be interpreted by an AI to search the web according to the user's request.",
-        ),
-    });
+    this.schema = traversaalSearchJsonSchema;

    this.apiKey = fields?.TRAVERSAAL_API_KEY ?? this.getApiKey();
  }

+  static get jsonSchema() {
+    return traversaalSearchJsonSchema;
+  }
+
  getApiKey() {
    const apiKey = getEnvironmentVariable('TRAVERSAAL_API_KEY');
    if (!apiKey && this.override) {
--- a/api/app/clients/tools/structured/Wolfram.js
+++ b/api/app/clients/tools/structured/Wolfram.js
@ -1,9 +1,19 @@
 /* eslint-disable no-useless-escape */
-const { z } = require('zod');
 const axios = require('axios');
 const { Tool } = require('@langchain/core/tools');
 const { logger } = require('@librechat/data-schemas');

+const wolframJsonSchema = {
+  type: 'object',
+  properties: {
+    input: {
+      type: 'string',
+      description: 'Natural language query to WolframAlpha following the guidelines',
+    },
+  },
+  required: ['input'],
+};
+
 class WolframAlphaAPI extends Tool {
  constructor(fields) {
    super();
@ -41,9 +51,11 @@ class WolframAlphaAPI extends Tool {
    // -- Do not explain each step unless user input is needed. Proceed directly to making a better API call based on the available assumptions.`;
    this.description = `WolframAlpha offers computation, math, curated knowledge, and real-time data. It handles natural language queries and performs complex calculations.
    Follow the guidelines to get the best results.`;
-    this.schema = z.object({
-      input: z.string().describe('Natural language query to WolframAlpha following the guidelines'),
-    });
+    this.schema = wolframJsonSchema;
+  }
+
+  static get jsonSchema() {
+    return wolframJsonSchema;
  }

  async fetchRawText(url) {
--- a/api/app/clients/tools/structured/YouTube.js
+++ b/api/app/clients/tools/structured/YouTube.js
@ -1,137 +0,0 @@
-const { ytToolkit } = require('@librechat/api');
-const { tool } = require('@langchain/core/tools');
-const { youtube } = require('@googleapis/youtube');
-const { logger } = require('@librechat/data-schemas');
-const { YoutubeTranscript } = require('youtube-transcript');
-const { getApiKey } = require('./credentials');
-
-function extractVideoId(url) {
-  const rawIdRegex = /^[a-zA-Z0-9_-]{11}$/;
-  if (rawIdRegex.test(url)) {
-    return url;
-  }
-
-  const regex = new RegExp(
-    '(?:youtu\\.be/|youtube(?:\\.com)?/(?:' +
-      '(?:watch\\?v=)|(?:embed/)|(?:shorts/)|(?:live/)|(?:v/)|(?:/))?)' +
-      '([a-zA-Z0-9_-]{11})(?:\\S+)?$',
-  );
-  const match = url.match(regex);
-  return match ? match[1] : null;
-}
-
-function parseTranscript(transcriptResponse) {
-  if (!Array.isArray(transcriptResponse)) {
-    return '';
-  }
-
-  return transcriptResponse
-    .map((entry) => entry.text.trim())
-    .filter((text) => text)
-    .join(' ')
-    .replaceAll('&amp;#39;', "'");
-}
-
-function createYouTubeTools(fields = {}) {
-  const envVar = 'YOUTUBE_API_KEY';
-  const override = fields.override ?? false;
-  const apiKey = fields.apiKey ?? fields[envVar] ?? getApiKey(envVar, override);
-
-  const youtubeClient = youtube({
-    version: 'v3',
-    auth: apiKey,
-  });
-
-  const searchTool = tool(async ({ query, maxResults = 5 }) => {
-    const response = await youtubeClient.search.list({
-      part: 'snippet',
-      q: query,
-      type: 'video',
-      maxResults: maxResults || 5,
-    });
-    const result = response.data.items.map((item) => ({
-      title: item.snippet.title,
-      description: item.snippet.description,
-      url: `https://www.youtube.com/watch?v=${item.id.videoId}`,
-    }));
-    return JSON.stringify(result, null, 2);
-  }, ytToolkit.youtube_search);
-
-  const infoTool = tool(async ({ url }) => {
-    const videoId = extractVideoId(url);
-    if (!videoId) {
-      throw new Error('Invalid YouTube URL or video ID');
-    }
-
-    const response = await youtubeClient.videos.list({
-      part: 'snippet,statistics',
-      id: videoId,
-    });
-
-    if (!response.data.items?.length) {
-      throw new Error('Video not found');
-    }
-    const video = response.data.items[0];
-
-    const result = {
-      title: video.snippet.title,
-      description: video.snippet.description,
-      views: video.statistics.viewCount,
-      likes: video.statistics.likeCount,
-      comments: video.statistics.commentCount,
-    };
-    return JSON.stringify(result, null, 2);
-  }, ytToolkit.youtube_info);
-
-  const commentsTool = tool(async ({ url, maxResults = 10 }) => {
-    const videoId = extractVideoId(url);
-    if (!videoId) {
-      throw new Error('Invalid YouTube URL or video ID');
-    }
-
-    const response = await youtubeClient.commentThreads.list({
-      part: 'snippet',
-      videoId,
-      maxResults: maxResults || 10,
-    });
-
-    const result = response.data.items.map((item) => ({
-      author: item.snippet.topLevelComment.snippet.authorDisplayName,
-      text: item.snippet.topLevelComment.snippet.textDisplay,
-      likes: item.snippet.topLevelComment.snippet.likeCount,
-    }));
-    return JSON.stringify(result, null, 2);
-  }, ytToolkit.youtube_comments);
-
-  const transcriptTool = tool(async ({ url }) => {
-    const videoId = extractVideoId(url);
-    if (!videoId) {
-      throw new Error('Invalid YouTube URL or video ID');
-    }
-
-    try {
-      try {
-        const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'en' });
-        return parseTranscript(transcript);
-      } catch (e) {
-        logger.error(e);
-      }
-
-      try {
-        const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'de' });
-        return parseTranscript(transcript);
-      } catch (e) {
-        logger.error(e);
-      }
-
-      const transcript = await YoutubeTranscript.fetchTranscript(videoId);
-      return parseTranscript(transcript);
-    } catch (error) {
-      throw new Error(`Failed to fetch transcript: ${error.message}`);
-    }
-  }, ytToolkit.youtube_transcript);
-
-  return [searchTool, infoTool, commentsTool, transcriptTool];
-}
-
-module.exports = createYouTubeTools;
--- a/api/app/clients/tools/structured/specs/DALLE3-proxy.spec.js
+++ b/api/app/clients/tools/structured/specs/DALLE3-proxy.spec.js
@ -1,7 +1,6 @@
 const DALLE3 = require('../DALLE3');
 const { ProxyAgent } = require('undici');

-jest.mock('tiktoken');
 const processFileURL = jest.fn();

 describe('DALLE3 Proxy Configuration', () => {
--- a/api/app/clients/tools/structured/specs/DALLE3.spec.js
+++ b/api/app/clients/tools/structured/specs/DALLE3.spec.js
@ -14,15 +14,6 @@ jest.mock('@librechat/data-schemas', () => {
  };
 });

-jest.mock('tiktoken', () => {
-  return {
-    encoding_for_model: jest.fn().mockReturnValue({
-      encode: jest.fn(),
-      decode: jest.fn(),
-    }),
-  };
-});
-
 const processFileURL = jest.fn();

 const generate = jest.fn();
--- a/api/app/clients/tools/structured/specs/GeminiImageGen-proxy.spec.js
+++ b/api/app/clients/tools/structured/specs/GeminiImageGen-proxy.spec.js
@ -0,0 +1,125 @@
+const { ProxyAgent } = require('undici');
+
+/**
+ * These tests verify the proxy wrapper behavior for GeminiImageGen.
+ * Instead of loading the full module (which has many dependencies),
+ * we directly test the wrapper logic that would be applied.
+ */
+describe('GeminiImageGen Proxy Configuration', () => {
+  let originalEnv;
+  let originalFetch;
+
+  beforeAll(() => {
+    originalEnv = { ...process.env };
+    originalFetch = globalThis.fetch;
+  });
+
+  beforeEach(() => {
+    process.env = { ...originalEnv };
+    globalThis.fetch = originalFetch;
+  });
+
+  afterEach(() => {
+    process.env = originalEnv;
+    globalThis.fetch = originalFetch;
+  });
+
+  /**
+   * Simulates the proxy wrapper that GeminiImageGen applies at module load.
+   * This is the same logic from GeminiImageGen.js lines 30-42.
+   */
+  function applyProxyWrapper() {
+    if (process.env.PROXY) {
+      const _originalFetch = globalThis.fetch;
+      const proxyAgent = new ProxyAgent(process.env.PROXY);
+
+      globalThis.fetch = function (url, options = {}) {
+        const urlString = url.toString();
+        if (urlString.includes('googleapis.com')) {
+          options = { ...options, dispatcher: proxyAgent };
+        }
+        return _originalFetch.call(this, url, options);
+      };
+    }
+  }
+
+  it('should wrap globalThis.fetch when PROXY env is set', () => {
+    process.env.PROXY = 'http://proxy.example.com:8080';
+
+    const fetchBeforeWrap = globalThis.fetch;
+
+    applyProxyWrapper();
+
+    expect(globalThis.fetch).not.toBe(fetchBeforeWrap);
+  });
+
+  it('should not wrap globalThis.fetch when PROXY env is not set', () => {
+    delete process.env.PROXY;
+
+    const fetchBeforeWrap = globalThis.fetch;
+
+    applyProxyWrapper();
+
+    expect(globalThis.fetch).toBe(fetchBeforeWrap);
+  });
+
+  it('should add dispatcher to googleapis.com URLs', async () => {
+    process.env.PROXY = 'http://proxy.example.com:8080';
+
+    let capturedOptions = null;
+    const mockFetch = jest.fn((url, options) => {
+      capturedOptions = options;
+      return Promise.resolve({ ok: true });
+    });
+    globalThis.fetch = mockFetch;
+
+    applyProxyWrapper();
+
+    await globalThis.fetch('https://generativelanguage.googleapis.com/v1/models', {});
+
+    expect(capturedOptions).toBeDefined();
+    expect(capturedOptions.dispatcher).toBeInstanceOf(ProxyAgent);
+  });
+
+  it('should not add dispatcher to non-googleapis.com URLs', async () => {
+    process.env.PROXY = 'http://proxy.example.com:8080';
+
+    let capturedOptions = null;
+    const mockFetch = jest.fn((url, options) => {
+      capturedOptions = options;
+      return Promise.resolve({ ok: true });
+    });
+    globalThis.fetch = mockFetch;
+
+    applyProxyWrapper();
+
+    await globalThis.fetch('https://api.openai.com/v1/images', {});
+
+    expect(capturedOptions).toBeDefined();
+    expect(capturedOptions.dispatcher).toBeUndefined();
+  });
+
+  it('should preserve existing options when adding dispatcher', async () => {
+    process.env.PROXY = 'http://proxy.example.com:8080';
+
+    let capturedOptions = null;
+    const mockFetch = jest.fn((url, options) => {
+      capturedOptions = options;
+      return Promise.resolve({ ok: true });
+    });
+    globalThis.fetch = mockFetch;
+
+    applyProxyWrapper();
+
+    const customHeaders = { 'X-Custom-Header': 'test' };
+    await globalThis.fetch('https://aiplatform.googleapis.com/v1/models', {
+      headers: customHeaders,
+      method: 'POST',
+    });
+
+    expect(capturedOptions).toBeDefined();
+    expect(capturedOptions.dispatcher).toBeInstanceOf(ProxyAgent);
+    expect(capturedOptions.headers).toEqual(customHeaders);
+    expect(capturedOptions.method).toBe('POST');
+  });
+});
--- a/api/app/clients/tools/util/fileSearch.js
+++ b/api/app/clients/tools/util/fileSearch.js
@ -1,4 +1,3 @@
-const { z } = require('zod');
 const axios = require('axios');
 const { tool } = require('@langchain/core/tools');
 const { logger } = require('@librechat/data-schemas');
@ -7,6 +6,18 @@ const { Tools, EToolResources } = require('librechat-data-provider');
 const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
 const { getFiles } = require('~/models');

+const fileSearchJsonSchema = {
+  type: 'object',
+  properties: {
+    query: {
+      type: 'string',
+      description:
+        "A natural language query to search for relevant information in the files. Be specific and use keywords related to the information you're looking for. The query will be used for semantic similarity matching against the file contents.",
+    },
+  },
+  required: ['query'],
+};
+
 /**
 *
 * @param {Object} options
@ -182,15 +193,9 @@ Use the EXACT anchor markers shown below (copy them verbatim) immediately after
 **ALWAYS mention the filename in your text before the citation marker. NEVER use markdown links or footnotes.**`
          : ''
      }`,
-      schema: z.object({
-        query: z
-          .string()
-          .describe(
-            "A natural language query to search for relevant information in the files. Be specific and use keywords related to the information you're looking for. The query will be used for semantic similarity matching against the file contents.",
-          ),
-      }),
+      schema: fileSearchJsonSchema,
    },
  );
 };

-module.exports = { createFileSearchTool, primeFiles };
+module.exports = { createFileSearchTool, primeFiles, fileSearchJsonSchema };
--- a/api/app/clients/tools/util/handleTools.js
+++ b/api/app/clients/tools/util/handleTools.js
@ -7,10 +7,12 @@ const {
 } = require('@librechat/agents');
 const {
  checkAccess,
+  toolkitParent,
  createSafeUser,
  mcpToolPattern,
  loadWebSearchAuth,
  buildImageToolContext,
+  buildWebSearchContext,
 } = require('@librechat/api');
 const { getMCPServersRegistry } = require('~/config');
 const {
@ -19,7 +21,6 @@ const {
  Permissions,
  EToolResources,
  PermissionTypes,
-  replaceSpecialVars,
 } = require('librechat-data-provider');
 const {
  availableTools,
@ -34,7 +35,6 @@ const {
  StructuredACS,
  TraversaalSearch,
  StructuredWolfram,
-  createYouTubeTools,
  TavilySearchResults,
  createGeminiImageTool,
  createOpenAIImageTools,
@ -185,11 +185,6 @@ const loadTools = async ({
  };

  const customConstructors = {
-    youtube: async (_toolContextMap) => {
-      const authFields = getAuthFields('youtube');
-      const authValues = await loadAuthValues({ userId: user, authFields });
-      return createYouTubeTools(authValues);
-    },
    image_gen_oai: async (toolContextMap) => {
      const authFields = getAuthFields('image_gen_oai');
      const authValues = await loadAuthValues({ userId: user, authFields });
@ -213,7 +208,7 @@ const loadTools = async ({
    },
    gemini_image_gen: async (toolContextMap) => {
      const authFields = getAuthFields('gemini_image_gen');
-      const authValues = await loadAuthValues({ userId: user, authFields });
+      const authValues = await loadAuthValues({ userId: user, authFields, throwError: false });
      const imageFiles = options.tool_resources?.[EToolResources.image_edit]?.files ?? [];
      const toolContext = buildImageToolContext({
        imageFiles,
@ -228,7 +223,6 @@ const loadTools = async ({
        isAgent: !!agent,
        req: options.req,
        imageFiles,
-        processFileURL: options.processFileURL,
        userId: user,
        fileStrategy,
      });
@ -331,24 +325,7 @@ const loadTools = async ({
      });
      const { onSearchResults, onGetHighlights } = options?.[Tools.web_search] ?? {};
      requestedTools[tool] = async () => {
-        toolContextMap[tool] = `# \`${tool}\`:
-Current Date & Time: ${replaceSpecialVars({ text: '{{iso_datetime}}' })}
-
-**Execute immediately without preface.** After search, provide a brief summary addressing the query directly, then structure your response with clear Markdown formatting (## headers, lists, tables). Cite sources properly, tailor tone to query type, and provide comprehensive details.
-
-**CITATION FORMAT - UNICODE ESCAPE SEQUENCES ONLY:**
-Use these EXACT escape sequences (copy verbatim): \\ue202 (before each anchor), \\ue200 (group start), \\ue201 (group end), \\ue203 (highlight start), \\ue204 (highlight end)
-
-Anchor pattern: \\ue202turn{N}{type}{index} where N=turn number, type=search|news|image|ref, index=0,1,2...
-
-**Examples (copy these exactly):**
- Single: "Statement.\\ue202turn0search0"
- Multiple: "Statement.\\ue202turn0search0\\ue202turn0news1"
- Group: "Statement. \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
- Highlight: "\\ue203Cited text.\\ue204\\ue202turn0search0"
- Image: "See photo\\ue202turn0image0."
-
-**CRITICAL:** Output escape sequences EXACTLY as shown. Do NOT substitute with † or other symbols. Place anchors AFTER punctuation. Cite every non-obvious fact/quote. NEVER use markdown links, [1], footnotes, or HTML tags.`.trim();
+        toolContextMap[tool] = buildWebSearchContext();
        return createSearchTool({
          ...result.authResult,
          onSearchResults,
@ -393,8 +370,16 @@ Anchor pattern: \\ue202turn{N}{type}{index} where N=turn number, type=search|new
      continue;
    }

-    if (customConstructors[tool]) {
-      requestedTools[tool] = async () => customConstructors[tool](toolContextMap);
+    const toolKey = customConstructors[tool] ? tool : toolkitParent[tool];
+    if (toolKey && customConstructors[toolKey]) {
+      if (!requestedTools[toolKey]) {
+        let cached;
+        requestedTools[toolKey] = async () => {
+          cached ??= customConstructors[toolKey](toolContextMap);
+          return cached;
+        };
+      }
+      requestedTools[tool] = requestedTools[toolKey];
      continue;
    }

--- a/api/cache/banViolation.js
+++ b/api/cache/banViolation.js
@ -55,6 +55,7 @@ const banViolation = async (req, res, errorMessage) => {

  res.clearCookie('refreshToken');
  res.clearCookie('openid_access_token');
+  res.clearCookie('openid_id_token');
  res.clearCookie('openid_user_id');
  res.clearCookie('token_provider');

--- a/api/cache/getLogStores.js
+++ b/api/cache/getLogStores.js
@ -37,6 +37,7 @@ const namespaces = {
  [CacheKeys.ROLES]: standardCache(CacheKeys.ROLES),
  [CacheKeys.APP_CONFIG]: standardCache(CacheKeys.APP_CONFIG),
  [CacheKeys.CONFIG_STORE]: standardCache(CacheKeys.CONFIG_STORE),
+  [CacheKeys.TOOL_CACHE]: standardCache(CacheKeys.TOOL_CACHE),
  [CacheKeys.PENDING_REQ]: standardCache(CacheKeys.PENDING_REQ),
  [CacheKeys.ENCODED_DOMAINS]: new Keyv({ store: keyvMongo, namespace: CacheKeys.ENCODED_DOMAINS }),
  [CacheKeys.ABORT_KEYS]: standardCache(CacheKeys.ABORT_KEYS, Time.TEN_MINUTES),
@ -46,11 +47,15 @@ const namespaces = {
  [CacheKeys.MODEL_QUERIES]: standardCache(CacheKeys.MODEL_QUERIES),
  [CacheKeys.AUDIO_RUNS]: standardCache(CacheKeys.AUDIO_RUNS, Time.TEN_MINUTES),
  [CacheKeys.MESSAGES]: standardCache(CacheKeys.MESSAGES, Time.ONE_MINUTE),
-  [CacheKeys.FLOWS]: standardCache(CacheKeys.FLOWS, Time.ONE_MINUTE * 3),
+  [CacheKeys.FLOWS]: standardCache(CacheKeys.FLOWS, Time.ONE_MINUTE * 10),
  [CacheKeys.OPENID_EXCHANGED_TOKENS]: standardCache(
    CacheKeys.OPENID_EXCHANGED_TOKENS,
    Time.TEN_MINUTES,
  ),
+  [CacheKeys.ADMIN_OAUTH_EXCHANGE]: standardCache(
+    CacheKeys.ADMIN_OAUTH_EXCHANGE,
+    Time.THIRTY_SECONDS,
+  ),
 };

 /**
--- a/api/db/connect.js
+++ b/api/db/connect.js
@ -40,6 +40,10 @@ if (!cached) {
  cached = global.mongoose = { conn: null, promise: null };
 }

+mongoose.connection.on('error', (err) => {
+  logger.error('[connectDb] MongoDB connection error:', err);
+});
+
 async function connectDb() {
  if (cached.conn && cached.conn?._readyState === 1) {
    return cached.conn;
--- a/api/db/indexSync.js
+++ b/api/db/indexSync.js
@ -13,6 +13,11 @@ const searchEnabled = isEnabled(process.env.SEARCH);
 const indexingDisabled = isEnabled(process.env.MEILI_NO_SYNC);
 let currentTimeout = null;

+const defaultSyncThreshold = 1000;
+const syncThreshold = process.env.MEILI_SYNC_THRESHOLD
+  ? parseInt(process.env.MEILI_SYNC_THRESHOLD, 10)
+  : defaultSyncThreshold;
+
 class MeiliSearchClient {
  static instance = null;

@ -221,25 +226,29 @@ async function performSync(flowManager, flowId, flowType) {
    }

    // Check if we need to sync messages
+    logger.info('[indexSync] Requesting message sync progress...');
    const messageProgress = await Message.getSyncProgress();
    if (!messageProgress.isComplete || settingsUpdated) {
      logger.info(
        `[indexSync] Messages need syncing: ${messageProgress.totalProcessed}/${messageProgress.totalDocuments} indexed`,
      );

-      // Check if we should do a full sync or incremental
-      const messageCount = await Message.countDocuments();
+      const messageCount = messageProgress.totalDocuments;
      const messagesIndexed = messageProgress.totalProcessed;
-      const syncThreshold = parseInt(process.env.MEILI_SYNC_THRESHOLD || '1000', 10);
+      const unindexedMessages = messageCount - messagesIndexed;
+      const noneIndexed = messagesIndexed === 0 && unindexedMessages > 0;

-      if (messageCount - messagesIndexed > syncThreshold) {
-        logger.info('[indexSync] Starting full message sync due to large difference');
-        await Message.syncWithMeili();
-        messagesSync = true;
-      } else if (messageCount !== messagesIndexed) {
-        logger.warn('[indexSync] Messages out of sync, performing incremental sync');
+      if (settingsUpdated || noneIndexed || unindexedMessages > syncThreshold) {
+        if (noneIndexed && !settingsUpdated) {
+          logger.info('[indexSync] No messages marked as indexed, forcing full sync');
+        }
+        logger.info(`[indexSync] Starting message sync (${unindexedMessages} unindexed)`);
        await Message.syncWithMeili();
        messagesSync = true;
+      } else if (unindexedMessages > 0) {
+        logger.info(
+          `[indexSync] ${unindexedMessages} messages unindexed (below threshold: ${syncThreshold}, skipping)`,
+        );
      }
    } else {
      logger.info(
@ -254,18 +263,22 @@ async function performSync(flowManager, flowId, flowType) {
        `[indexSync] Conversations need syncing: ${convoProgress.totalProcessed}/${convoProgress.totalDocuments} indexed`,
      );

-      const convoCount = await Conversation.countDocuments();
+      const convoCount = convoProgress.totalDocuments;
      const convosIndexed = convoProgress.totalProcessed;
-      const syncThreshold = parseInt(process.env.MEILI_SYNC_THRESHOLD || '1000', 10);
+      const unindexedConvos = convoCount - convosIndexed;
+      const noneConvosIndexed = convosIndexed === 0 && unindexedConvos > 0;

-      if (convoCount - convosIndexed > syncThreshold) {
-        logger.info('[indexSync] Starting full conversation sync due to large difference');
-        await Conversation.syncWithMeili();
-        convosSync = true;
-      } else if (convoCount !== convosIndexed) {
-        logger.warn('[indexSync] Convos out of sync, performing incremental sync');
+      if (settingsUpdated || noneConvosIndexed || unindexedConvos > syncThreshold) {
+        if (noneConvosIndexed && !settingsUpdated) {
+          logger.info('[indexSync] No conversations marked as indexed, forcing full sync');
+        }
+        logger.info(`[indexSync] Starting convos sync (${unindexedConvos} unindexed)`);
        await Conversation.syncWithMeili();
        convosSync = true;
+      } else if (unindexedConvos > 0) {
+        logger.info(
+          `[indexSync] ${unindexedConvos} convos unindexed (below threshold: ${syncThreshold}, skipping)`,
+        );
      }
    } else {
      logger.info(
--- a/api/db/indexSync.spec.js
+++ b/api/db/indexSync.spec.js
@ -0,0 +1,530 @@
+/**
+ * Unit tests for performSync() function in indexSync.js
+ *
+ * Tests use real mongoose with mocked model methods, only mocking external calls.
+ */
+
+const mongoose = require('mongoose');
+
+// Mock only external dependencies (not internal classes/models)
+const mockLogger = {
+  info: jest.fn(),
+  warn: jest.fn(),
+  error: jest.fn(),
+  debug: jest.fn(),
+};
+
+const mockMeiliHealth = jest.fn();
+const mockMeiliIndex = jest.fn();
+const mockBatchResetMeiliFlags = jest.fn();
+const mockIsEnabled = jest.fn();
+const mockGetLogStores = jest.fn();
+
+// Create mock models that will be reused
+const createMockModel = (collectionName) => ({
+  collection: { name: collectionName },
+  getSyncProgress: jest.fn(),
+  syncWithMeili: jest.fn(),
+  countDocuments: jest.fn(),
+});
+
+const originalMessageModel = mongoose.models.Message;
+const originalConversationModel = mongoose.models.Conversation;
+
+// Mock external modules
+jest.mock('@librechat/data-schemas', () => ({
+  logger: mockLogger,
+}));
+
+jest.mock('meilisearch', () => ({
+  MeiliSearch: jest.fn(() => ({
+    health: mockMeiliHealth,
+    index: mockMeiliIndex,
+  })),
+}));
+
+jest.mock('./utils', () => ({
+  batchResetMeiliFlags: mockBatchResetMeiliFlags,
+}));
+
+jest.mock('@librechat/api', () => ({
+  isEnabled: mockIsEnabled,
+  FlowStateManager: jest.fn(),
+}));
+
+jest.mock('~/cache', () => ({
+  getLogStores: mockGetLogStores,
+}));
+
+// Set environment before module load
+process.env.MEILI_HOST = 'http://localhost:7700';
+process.env.MEILI_MASTER_KEY = 'test-key';
+process.env.SEARCH = 'true';
+process.env.MEILI_SYNC_THRESHOLD = '1000'; // Set threshold before module loads
+
+describe('performSync() - syncThreshold logic', () => {
+  const ORIGINAL_ENV = process.env;
+  let Message;
+  let Conversation;
+
+  beforeAll(() => {
+    Message = createMockModel('messages');
+    Conversation = createMockModel('conversations');
+
+    mongoose.models.Message = Message;
+    mongoose.models.Conversation = Conversation;
+  });
+
+  beforeEach(() => {
+    // Reset all mocks
+    jest.clearAllMocks();
+    // Reset modules to ensure fresh load of indexSync.js and its top-level consts (like syncThreshold)
+    jest.resetModules();
+
+    // Set up environment
+    process.env = { ...ORIGINAL_ENV };
+    process.env.MEILI_HOST = 'http://localhost:7700';
+    process.env.MEILI_MASTER_KEY = 'test-key';
+    process.env.SEARCH = 'true';
+    delete process.env.MEILI_NO_SYNC;
+
+    // Re-ensure models are available in mongoose after resetModules
+    // We must require mongoose again to get the fresh instance that indexSync will use
+    const mongoose = require('mongoose');
+    mongoose.models.Message = Message;
+    mongoose.models.Conversation = Conversation;
+
+    // Mock isEnabled
+    mockIsEnabled.mockImplementation((val) => val === 'true' || val === true);
+
+    // Mock MeiliSearch client responses
+    mockMeiliHealth.mockResolvedValue({ status: 'available' });
+    mockMeiliIndex.mockReturnValue({
+      getSettings: jest.fn().mockResolvedValue({ filterableAttributes: ['user'] }),
+      updateSettings: jest.fn().mockResolvedValue({}),
+      search: jest.fn().mockResolvedValue({ hits: [] }),
+    });
+
+    mockBatchResetMeiliFlags.mockResolvedValue(undefined);
+  });
+
+  afterEach(() => {
+    process.env = ORIGINAL_ENV;
+  });
+
+  afterAll(() => {
+    mongoose.models.Message = originalMessageModel;
+    mongoose.models.Conversation = originalConversationModel;
+  });
+
+  test('triggers sync when unindexed messages exceed syncThreshold', async () => {
+    // Arrange: Set threshold before module load
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Arrange: 1050 unindexed messages > 1000 threshold
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 1150, // 1050 unindexed
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 50,
+      totalDocuments: 50,
+      isComplete: true,
+    });
+
+    Message.syncWithMeili.mockResolvedValue(undefined);
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: No countDocuments calls
+    expect(Message.countDocuments).not.toHaveBeenCalled();
+    expect(Conversation.countDocuments).not.toHaveBeenCalled();
+
+    // Assert: Message sync triggered because 1050 > 1000
+    expect(Message.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Messages need syncing: 100/1150 indexed',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Starting message sync (1050 unindexed)',
+    );
+
+    // Assert: Conversation sync NOT triggered (already complete)
+    expect(Conversation.syncWithMeili).not.toHaveBeenCalled();
+  });
+
+  test('skips sync when unindexed messages are below syncThreshold', async () => {
+    // Arrange: 50 unindexed messages < 1000 threshold
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 150, // 50 unindexed
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 50,
+      totalDocuments: 50,
+      isComplete: true,
+    });
+
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: No countDocuments calls
+    expect(Message.countDocuments).not.toHaveBeenCalled();
+    expect(Conversation.countDocuments).not.toHaveBeenCalled();
+
+    // Assert: Message sync NOT triggered because 50 < 1000
+    expect(Message.syncWithMeili).not.toHaveBeenCalled();
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Messages need syncing: 100/150 indexed',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] 50 messages unindexed (below threshold: 1000, skipping)',
+    );
+
+    // Assert: Conversation sync NOT triggered (already complete)
+    expect(Conversation.syncWithMeili).not.toHaveBeenCalled();
+  });
+
+  test('respects syncThreshold at boundary (exactly at threshold)', async () => {
+    // Arrange: 1000 unindexed messages = 1000 threshold (NOT greater than)
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 1100, // 1000 unindexed
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 0,
+      totalDocuments: 0,
+      isComplete: true,
+    });
+
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: No countDocuments calls
+    expect(Message.countDocuments).not.toHaveBeenCalled();
+
+    // Assert: Message sync NOT triggered because 1000 is NOT > 1000
+    expect(Message.syncWithMeili).not.toHaveBeenCalled();
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Messages need syncing: 100/1100 indexed',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] 1000 messages unindexed (below threshold: 1000, skipping)',
+    );
+  });
+
+  test('triggers sync when unindexed is threshold + 1', async () => {
+    // Arrange: 1001 unindexed messages > 1000 threshold
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 1101, // 1001 unindexed
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 0,
+      totalDocuments: 0,
+      isComplete: true,
+    });
+
+    Message.syncWithMeili.mockResolvedValue(undefined);
+
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: No countDocuments calls
+    expect(Message.countDocuments).not.toHaveBeenCalled();
+
+    // Assert: Message sync triggered because 1001 > 1000
+    expect(Message.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Messages need syncing: 100/1101 indexed',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Starting message sync (1001 unindexed)',
+    );
+  });
+
+  test('uses totalDocuments from convoProgress for conversation sync decisions', async () => {
+    // Arrange: Messages complete, conversations need sync
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 100,
+      isComplete: true,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 50,
+      totalDocuments: 1100, // 1050 unindexed > 1000 threshold
+      isComplete: false,
+    });
+
+    Conversation.syncWithMeili.mockResolvedValue(undefined);
+
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: No countDocuments calls (the optimization)
+    expect(Message.countDocuments).not.toHaveBeenCalled();
+    expect(Conversation.countDocuments).not.toHaveBeenCalled();
+
+    // Assert: Only conversation sync triggered
+    expect(Message.syncWithMeili).not.toHaveBeenCalled();
+    expect(Conversation.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Conversations need syncing: 50/1100 indexed',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Starting convos sync (1050 unindexed)',
+    );
+  });
+
+  test('skips sync when collections are fully synced', async () => {
+    // Arrange: Everything already synced
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 100,
+      isComplete: true,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 50,
+      totalDocuments: 50,
+      isComplete: true,
+    });
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: No countDocuments calls
+    expect(Message.countDocuments).not.toHaveBeenCalled();
+    expect(Conversation.countDocuments).not.toHaveBeenCalled();
+
+    // Assert: No sync triggered
+    expect(Message.syncWithMeili).not.toHaveBeenCalled();
+    expect(Conversation.syncWithMeili).not.toHaveBeenCalled();
+
+    // Assert: Correct logs
+    expect(mockLogger.info).toHaveBeenCalledWith('[indexSync] Messages are fully synced: 100/100');
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Conversations are fully synced: 50/50',
+    );
+  });
+
+  test('triggers message sync when settingsUpdated even if below syncThreshold', async () => {
+    // Arrange: Only 50 unindexed messages (< 1000 threshold), but settings were updated
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 150, // 50 unindexed
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 50,
+      totalDocuments: 50,
+      isComplete: true,
+    });
+
+    Message.syncWithMeili.mockResolvedValue(undefined);
+
+    // Mock settings update scenario
+    mockMeiliIndex.mockReturnValue({
+      getSettings: jest.fn().mockResolvedValue({ filterableAttributes: [] }), // No user field
+      updateSettings: jest.fn().mockResolvedValue({}),
+      search: jest.fn().mockResolvedValue({ hits: [] }),
+    });
+
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: Flags were reset due to settings update
+    expect(mockBatchResetMeiliFlags).toHaveBeenCalledWith(Message.collection);
+    expect(mockBatchResetMeiliFlags).toHaveBeenCalledWith(Conversation.collection);
+
+    // Assert: Message sync triggered despite being below threshold (50 < 1000)
+    expect(Message.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Settings updated. Forcing full re-sync to reindex with new configuration...',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Starting message sync (50 unindexed)',
+    );
+  });
+
+  test('triggers conversation sync when settingsUpdated even if below syncThreshold', async () => {
+    // Arrange: Messages complete, conversations have 50 unindexed (< 1000 threshold), but settings were updated
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 100,
+      isComplete: true,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 50,
+      totalDocuments: 100, // 50 unindexed
+      isComplete: false,
+    });
+
+    Conversation.syncWithMeili.mockResolvedValue(undefined);
+
+    // Mock settings update scenario
+    mockMeiliIndex.mockReturnValue({
+      getSettings: jest.fn().mockResolvedValue({ filterableAttributes: [] }), // No user field
+      updateSettings: jest.fn().mockResolvedValue({}),
+      search: jest.fn().mockResolvedValue({ hits: [] }),
+    });
+
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: Flags were reset due to settings update
+    expect(mockBatchResetMeiliFlags).toHaveBeenCalledWith(Message.collection);
+    expect(mockBatchResetMeiliFlags).toHaveBeenCalledWith(Conversation.collection);
+
+    // Assert: Conversation sync triggered despite being below threshold (50 < 1000)
+    expect(Conversation.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Settings updated. Forcing full re-sync to reindex with new configuration...',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith('[indexSync] Starting convos sync (50 unindexed)');
+  });
+
+  test('triggers both message and conversation sync when settingsUpdated even if both below syncThreshold', async () => {
+    // Arrange: Set threshold before module load
+    process.env.MEILI_SYNC_THRESHOLD = '1000';
+
+    // Arrange: Both have documents below threshold (50 each), but settings were updated
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 100,
+      totalDocuments: 150, // 50 unindexed
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 50,
+      totalDocuments: 100, // 50 unindexed
+      isComplete: false,
+    });
+
+    Message.syncWithMeili.mockResolvedValue(undefined);
+    Conversation.syncWithMeili.mockResolvedValue(undefined);
+
+    // Mock settings update scenario
+    mockMeiliIndex.mockReturnValue({
+      getSettings: jest.fn().mockResolvedValue({ filterableAttributes: [] }), // No user field
+      updateSettings: jest.fn().mockResolvedValue({}),
+      search: jest.fn().mockResolvedValue({ hits: [] }),
+    });
+
+    // Act
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    // Assert: Flags were reset due to settings update
+    expect(mockBatchResetMeiliFlags).toHaveBeenCalledWith(Message.collection);
+    expect(mockBatchResetMeiliFlags).toHaveBeenCalledWith(Conversation.collection);
+
+    // Assert: Both syncs triggered despite both being below threshold
+    expect(Message.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(Conversation.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Settings updated. Forcing full re-sync to reindex with new configuration...',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Starting message sync (50 unindexed)',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith('[indexSync] Starting convos sync (50 unindexed)');
+  });
+
+  test('forces sync when zero documents indexed (reset scenario) even if below threshold', async () => {
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 0,
+      totalDocuments: 680,
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 0,
+      totalDocuments: 76,
+      isComplete: false,
+    });
+
+    Message.syncWithMeili.mockResolvedValue(undefined);
+    Conversation.syncWithMeili.mockResolvedValue(undefined);
+
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    expect(Message.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(Conversation.syncWithMeili).toHaveBeenCalledTimes(1);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] No messages marked as indexed, forcing full sync',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] Starting message sync (680 unindexed)',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] No conversations marked as indexed, forcing full sync',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith('[indexSync] Starting convos sync (76 unindexed)');
+  });
+
+  test('does NOT force sync when some documents already indexed and below threshold', async () => {
+    Message.getSyncProgress.mockResolvedValue({
+      totalProcessed: 630,
+      totalDocuments: 680,
+      isComplete: false,
+    });
+
+    Conversation.getSyncProgress.mockResolvedValue({
+      totalProcessed: 70,
+      totalDocuments: 76,
+      isComplete: false,
+    });
+
+    const indexSync = require('./indexSync');
+    await indexSync();
+
+    expect(Message.syncWithMeili).not.toHaveBeenCalled();
+    expect(Conversation.syncWithMeili).not.toHaveBeenCalled();
+    expect(mockLogger.info).not.toHaveBeenCalledWith(
+      '[indexSync] No messages marked as indexed, forcing full sync',
+    );
+    expect(mockLogger.info).not.toHaveBeenCalledWith(
+      '[indexSync] No conversations marked as indexed, forcing full sync',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] 50 messages unindexed (below threshold: 1000, skipping)',
+    );
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      '[indexSync] 6 convos unindexed (below threshold: 1000, skipping)',
+    );
+  });
+});
--- a/api/db/utils.js
+++ b/api/db/utils.js
@ -26,7 +26,7 @@ async function batchResetMeiliFlags(collection) {
  try {
    while (hasMore) {
      const docs = await collection
-        .find({ expiredAt: null, _meiliIndex: true }, { projection: { _id: 1 } })
+        .find({ expiredAt: null, _meiliIndex: { $ne: false } }, { projection: { _id: 1 } })
        .limit(BATCH_SIZE)
        .toArray();

--- a/api/db/utils.spec.js
+++ b/api/db/utils.spec.js
@ -265,8 +265,8 @@ describe('batchResetMeiliFlags', () => {

      const result = await batchResetMeiliFlags(testCollection);

-      // Only one document has _meiliIndex: true
-      expect(result).toBe(1);
+      // both documents should be updated
+      expect(result).toBe(2);
    });

    it('should handle mixed document states correctly', async () => {
@ -275,16 +275,18 @@ describe('batchResetMeiliFlags', () => {
        { _id: new mongoose.Types.ObjectId(), expiredAt: null, _meiliIndex: false },
        { _id: new mongoose.Types.ObjectId(), expiredAt: new Date(), _meiliIndex: true },
        { _id: new mongoose.Types.ObjectId(), expiredAt: null, _meiliIndex: true },
+        { _id: new mongoose.Types.ObjectId(), expiredAt: null, _meiliIndex: null },
+        { _id: new mongoose.Types.ObjectId(), expiredAt: null },
      ]);

      const result = await batchResetMeiliFlags(testCollection);

-      expect(result).toBe(2);
+      expect(result).toBe(4);

      const flaggedDocs = await testCollection
        .find({ expiredAt: null, _meiliIndex: false })
        .toArray();
-      expect(flaggedDocs).toHaveLength(3); // 2 were updated, 1 was already false
+      expect(flaggedDocs).toHaveLength(5); // 4 were updated, 1 was already false
    });
  });

--- a/api/jest.config.js
+++ b/api/jest.config.js
@ -3,12 +3,13 @@ module.exports = {
  clearMocks: true,
  roots: ['<rootDir>'],
  coverageDirectory: 'coverage',
+  maxWorkers: '50%',
  testTimeout: 30000, // 30 seconds timeout for all tests
  setupFiles: ['./test/jestSetup.js', './test/__mocks__/logger.js'],
  moduleNameMapper: {
    '~/(.*)': '<rootDir>/$1',
    '~/data/auth.json': '<rootDir>/__mocks__/auth.mock.json',
-    '^openid-client/passport$': '<rootDir>/test/__mocks__/openid-client-passport.js', // Mock for the passport strategy part
+    '^openid-client/passport$': '<rootDir>/test/__mocks__/openid-client-passport.js',
    '^openid-client$': '<rootDir>/test/__mocks__/openid-client.js',
  },
  transformIgnorePatterns: ['/node_modules/(?!(openid-client|oauth4webapi|jose)/).*/'],
--- a/api/models/Agent.js
+++ b/api/models/Agent.js
@ -11,17 +11,15 @@ const {
  isEphemeralAgentId,
  encodeEphemeralAgentId,
 } = require('librechat-data-provider');
-const { GLOBAL_PROJECT_NAME, mcp_all, mcp_delimiter } =
-  require('librechat-data-provider').Constants;
+const { mcp_all, mcp_delimiter } = require('librechat-data-provider').Constants;
 const {
  removeAgentFromAllProjects,
  removeAgentIdsFromProject,
  addAgentIdsToProject,
-  getProjectByName,
 } = require('./Project');
 const { removeAllPermissions } = require('~/server/services/PermissionService');
 const { getMCPServerTools } = require('~/server/services/Config');
-const { Agent, AclEntry } = require('~/db/models');
+const { Agent, AclEntry, User } = require('~/db/models');
 const { getActions } = require('./Action');

 /**
@ -591,15 +589,29 @@ const deleteAgent = async (searchParameter) => {
  const agent = await Agent.findOneAndDelete(searchParameter);
  if (agent) {
    await removeAgentFromAllProjects(agent.id);
-    await removeAllPermissions({
-      resourceType: ResourceType.AGENT,
-      resourceId: agent._id,
-    });
+    await Promise.all([
+      removeAllPermissions({
+        resourceType: ResourceType.AGENT,
+        resourceId: agent._id,
+      }),
+      removeAllPermissions({
+        resourceType: ResourceType.REMOTE_AGENT,
+        resourceId: agent._id,
+      }),
+    ]);
    try {
      await Agent.updateMany({ 'edges.to': agent.id }, { $pull: { edges: { to: agent.id } } });
    } catch (error) {
      logger.error('[deleteAgent] Error removing agent from handoff edges', error);
    }
+    try {
+      await User.updateMany(
+        { 'favorites.agentId': agent.id },
+        { $pull: { favorites: { agentId: agent.id } } },
+      );
+    } catch (error) {
+      logger.error('[deleteAgent] Error removing agent from user favorites', error);
+    }
  }
  return agent;
 };
@ -625,10 +637,19 @@ const deleteUserAgents = async (userId) => {
    }

    await AclEntry.deleteMany({
-      resourceType: ResourceType.AGENT,
+      resourceType: { $in: [ResourceType.AGENT, ResourceType.REMOTE_AGENT] },
      resourceId: { $in: agentObjectIds },
    });

+    try {
+      await User.updateMany(
+        { 'favorites.agentId': { $in: agentIds } },
+        { $pull: { favorites: { agentId: { $in: agentIds } } } },
+      );
+    } catch (error) {
+      logger.error('[deleteUserAgents] Error removing agents from user favorites', error);
+    }
+
    await Agent.deleteMany({ author: userId });
  } catch (error) {
    logger.error('[deleteUserAgents] General error:', error);
@ -735,59 +756,6 @@ const getListAgentsByAccess = async ({
  };
 };

-/**
- * Get all agents.
- * @deprecated Use getListAgentsByAccess for ACL-aware agent listing
- * @param {Object} searchParameter - The search parameters to find matching agents.
- * @param {string} searchParameter.author - The user ID of the agent's author.
- * @returns {Promise<Object>} A promise that resolves to an object containing the agents data and pagination info.
- */
-const getListAgents = async (searchParameter) => {
-  const { author, ...otherParams } = searchParameter;
-
-  let query = Object.assign({ author }, otherParams);
-
-  const globalProject = await getProjectByName(GLOBAL_PROJECT_NAME, ['agentIds']);
-  if (globalProject && (globalProject.agentIds?.length ?? 0) > 0) {
-    const globalQuery = { id: { $in: globalProject.agentIds }, ...otherParams };
-    delete globalQuery.author;
-    query = { $or: [globalQuery, query] };
-  }
-  const agents = (
-    await Agent.find(query, {
-      id: 1,
-      _id: 1,
-      name: 1,
-      avatar: 1,
-      author: 1,
-      projectIds: 1,
-      description: 1,
-      // @deprecated - isCollaborative replaced by ACL permissions
-      isCollaborative: 1,
-      category: 1,
-    }).lean()
-  ).map((agent) => {
-    if (agent.author?.toString() !== author) {
-      delete agent.author;
-    }
-    if (agent.author) {
-      agent.author = agent.author.toString();
-    }
-    return agent;
-  });
-
-  const hasMore = agents.length > 0;
-  const firstId = agents.length > 0 ? agents[0].id : null;
-  const lastId = agents.length > 0 ? agents[agents.length - 1].id : null;
-
-  return {
-    data: agents,
-    has_more: hasMore,
-    first_id: firstId,
-    last_id: lastId,
-  };
-};
-
 /**
 * Updates the projects associated with an agent, adding and removing project IDs as specified.
 * This function also updates the corresponding projects to include or exclude the agent ID.
@ -953,12 +921,11 @@ module.exports = {
  updateAgent,
  deleteAgent,
  deleteUserAgents,
-  getListAgents,
  revertAgentVersion,
  updateAgentProjects,
+  countPromotedAgents,
  addAgentResourceFile,
  getListAgentsByAccess,
  removeAgentResourceFiles,
  generateActionMetadataHash,
-  countPromotedAgents,
 };
--- a/api/models/Agent.spec.js
+++ b/api/models/Agent.spec.js
@ -22,17 +22,17 @@ const {
  createAgent,
  updateAgent,
  deleteAgent,
-  getListAgents,
-  getListAgentsByAccess,
+  deleteUserAgents,
  revertAgentVersion,
  updateAgentProjects,
  addAgentResourceFile,
+  getListAgentsByAccess,
  removeAgentResourceFiles,
  generateActionMetadataHash,
 } = require('./Agent');
 const permissionService = require('~/server/services/PermissionService');
 const { getCachedTools, getMCPServerTools } = require('~/server/services/Config');
-const { AclEntry } = require('~/db/models');
+const { AclEntry, User } = require('~/db/models');

 /**
 * @type {import('mongoose').Model<import('@librechat/data-schemas').IAgent>}
@ -59,6 +59,7 @@ describe('models/Agent', () => {

    beforeEach(async () => {
      await Agent.deleteMany({});
+      await User.deleteMany({});
    });

    test('should add tool_resource to tools if missing', async () => {
@ -575,43 +576,488 @@ describe('models/Agent', () => {
      expect(sourceAgentAfter.edges).toHaveLength(0);
    });

-    test('should list agents by author', async () => {
+    test('should remove agent from user favorites when agent is deleted', async () => {
+      const agentId = `agent_${uuidv4()}`;
+      const authorId = new mongoose.Types.ObjectId();
+      const userId = new mongoose.Types.ObjectId();
+
+      // Create agent
+      await createAgent({
+        id: agentId,
+        name: 'Agent To Delete',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      // Create user with the agent in favorites
+      await User.create({
+        _id: userId,
+        name: 'Test User',
+        email: `test-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: agentId }, { model: 'gpt-4', endpoint: 'openAI' }],
+      });
+
+      // Verify user has agent in favorites
+      const userBefore = await User.findById(userId);
+      expect(userBefore.favorites).toHaveLength(2);
+      expect(userBefore.favorites.some((f) => f.agentId === agentId)).toBe(true);
+
+      // Delete the agent
+      await deleteAgent({ id: agentId });
+
+      // Verify agent is deleted
+      const agentAfterDelete = await getAgent({ id: agentId });
+      expect(agentAfterDelete).toBeNull();
+
+      // Verify agent is removed from user favorites
+      const userAfter = await User.findById(userId);
+      expect(userAfter.favorites).toHaveLength(1);
+      expect(userAfter.favorites.some((f) => f.agentId === agentId)).toBe(false);
+      expect(userAfter.favorites.some((f) => f.model === 'gpt-4')).toBe(true);
+    });
+
+    test('should remove agent from multiple users favorites when agent is deleted', async () => {
+      const agentId = `agent_${uuidv4()}`;
+      const authorId = new mongoose.Types.ObjectId();
+      const user1Id = new mongoose.Types.ObjectId();
+      const user2Id = new mongoose.Types.ObjectId();
+
+      // Create agent
+      await createAgent({
+        id: agentId,
+        name: 'Agent To Delete',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      // Create two users with the agent in favorites
+      await User.create({
+        _id: user1Id,
+        name: 'Test User 1',
+        email: `test1-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: agentId }],
+      });
+
+      await User.create({
+        _id: user2Id,
+        name: 'Test User 2',
+        email: `test2-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: agentId }, { agentId: `agent_${uuidv4()}` }],
+      });
+
+      // Delete the agent
+      await deleteAgent({ id: agentId });
+
+      // Verify agent is removed from both users' favorites
+      const user1After = await User.findById(user1Id);
+      const user2After = await User.findById(user2Id);
+
+      expect(user1After.favorites).toHaveLength(0);
+      expect(user2After.favorites).toHaveLength(1);
+      expect(user2After.favorites.some((f) => f.agentId === agentId)).toBe(false);
+    });
+
+    test('should preserve other agents in database when one agent is deleted', async () => {
+      const agentToDeleteId = `agent_${uuidv4()}`;
+      const agentToKeep1Id = `agent_${uuidv4()}`;
+      const agentToKeep2Id = `agent_${uuidv4()}`;
+      const authorId = new mongoose.Types.ObjectId();
+
+      // Create multiple agents
+      await createAgent({
+        id: agentToDeleteId,
+        name: 'Agent To Delete',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      await createAgent({
+        id: agentToKeep1Id,
+        name: 'Agent To Keep 1',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      await createAgent({
+        id: agentToKeep2Id,
+        name: 'Agent To Keep 2',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      // Verify all agents exist
+      expect(await getAgent({ id: agentToDeleteId })).not.toBeNull();
+      expect(await getAgent({ id: agentToKeep1Id })).not.toBeNull();
+      expect(await getAgent({ id: agentToKeep2Id })).not.toBeNull();
+
+      // Delete one agent
+      await deleteAgent({ id: agentToDeleteId });
+
+      // Verify only the deleted agent is removed, others remain intact
+      expect(await getAgent({ id: agentToDeleteId })).toBeNull();
+      const keptAgent1 = await getAgent({ id: agentToKeep1Id });
+      const keptAgent2 = await getAgent({ id: agentToKeep2Id });
+      expect(keptAgent1).not.toBeNull();
+      expect(keptAgent1.name).toBe('Agent To Keep 1');
+      expect(keptAgent2).not.toBeNull();
+      expect(keptAgent2.name).toBe('Agent To Keep 2');
+    });
+
+    test('should preserve other agents in user favorites when one agent is deleted', async () => {
+      const agentToDeleteId = `agent_${uuidv4()}`;
+      const agentToKeep1Id = `agent_${uuidv4()}`;
+      const agentToKeep2Id = `agent_${uuidv4()}`;
+      const authorId = new mongoose.Types.ObjectId();
+      const userId = new mongoose.Types.ObjectId();
+
+      // Create multiple agents
+      await createAgent({
+        id: agentToDeleteId,
+        name: 'Agent To Delete',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      await createAgent({
+        id: agentToKeep1Id,
+        name: 'Agent To Keep 1',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      await createAgent({
+        id: agentToKeep2Id,
+        name: 'Agent To Keep 2',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      // Create user with all three agents in favorites
+      await User.create({
+        _id: userId,
+        name: 'Test User',
+        email: `test-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [
+          { agentId: agentToDeleteId },
+          { agentId: agentToKeep1Id },
+          { agentId: agentToKeep2Id },
+        ],
+      });
+
+      // Verify user has all three agents in favorites
+      const userBefore = await User.findById(userId);
+      expect(userBefore.favorites).toHaveLength(3);
+
+      // Delete one agent
+      await deleteAgent({ id: agentToDeleteId });
+
+      // Verify only the deleted agent is removed from favorites
+      const userAfter = await User.findById(userId);
+      expect(userAfter.favorites).toHaveLength(2);
+      expect(userAfter.favorites.some((f) => f.agentId === agentToDeleteId)).toBe(false);
+      expect(userAfter.favorites.some((f) => f.agentId === agentToKeep1Id)).toBe(true);
+      expect(userAfter.favorites.some((f) => f.agentId === agentToKeep2Id)).toBe(true);
+    });
+
+    test('should not affect users who do not have deleted agent in favorites', async () => {
+      const agentToDeleteId = `agent_${uuidv4()}`;
+      const otherAgentId = `agent_${uuidv4()}`;
+      const authorId = new mongoose.Types.ObjectId();
+      const userWithDeletedAgentId = new mongoose.Types.ObjectId();
+      const userWithoutDeletedAgentId = new mongoose.Types.ObjectId();
+
+      // Create agents
+      await createAgent({
+        id: agentToDeleteId,
+        name: 'Agent To Delete',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      await createAgent({
+        id: otherAgentId,
+        name: 'Other Agent',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      // Create user with the agent to be deleted
+      await User.create({
+        _id: userWithDeletedAgentId,
+        name: 'User With Deleted Agent',
+        email: `user1-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: agentToDeleteId }, { model: 'gpt-4', endpoint: 'openAI' }],
+      });
+
+      // Create user without the agent to be deleted
+      await User.create({
+        _id: userWithoutDeletedAgentId,
+        name: 'User Without Deleted Agent',
+        email: `user2-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: otherAgentId }, { model: 'claude-3', endpoint: 'anthropic' }],
+      });
+
+      // Delete the agent
+      await deleteAgent({ id: agentToDeleteId });
+
+      // Verify user with deleted agent has it removed
+      const userWithDeleted = await User.findById(userWithDeletedAgentId);
+      expect(userWithDeleted.favorites).toHaveLength(1);
+      expect(userWithDeleted.favorites.some((f) => f.agentId === agentToDeleteId)).toBe(false);
+      expect(userWithDeleted.favorites.some((f) => f.model === 'gpt-4')).toBe(true);
+
+      // Verify user without deleted agent is completely unaffected
+      const userWithoutDeleted = await User.findById(userWithoutDeletedAgentId);
+      expect(userWithoutDeleted.favorites).toHaveLength(2);
+      expect(userWithoutDeleted.favorites.some((f) => f.agentId === otherAgentId)).toBe(true);
+      expect(userWithoutDeleted.favorites.some((f) => f.model === 'claude-3')).toBe(true);
+    });
+
+    test('should remove all user agents from favorites when deleteUserAgents is called', async () => {
      const authorId = new mongoose.Types.ObjectId();
      const otherAuthorId = new mongoose.Types.ObjectId();
+      const userId = new mongoose.Types.ObjectId();

-      const agentIds = [];
-      for (let i = 0; i < 5; i++) {
-        const id = `agent_${uuidv4()}`;
-        agentIds.push(id);
-        await createAgent({
-          id,
-          name: `Agent ${i}`,
-          provider: 'test',
-          model: 'test-model',
-          author: authorId,
-        });
-      }
+      const agent1Id = `agent_${uuidv4()}`;
+      const agent2Id = `agent_${uuidv4()}`;
+      const otherAuthorAgentId = `agent_${uuidv4()}`;

-      for (let i = 0; i < 3; i++) {
-        await createAgent({
-          id: `other_agent_${uuidv4()}`,
-          name: `Other Agent ${i}`,
-          provider: 'test',
-          model: 'test-model',
-          author: otherAuthorId,
-        });
-      }
+      // Create agents by the author to be deleted
+      await createAgent({
+        id: agent1Id,
+        name: 'Author Agent 1',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });

-      const result = await getListAgents({ author: authorId.toString() });
+      await createAgent({
+        id: agent2Id,
+        name: 'Author Agent 2',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });

-      expect(result).toBeDefined();
-      expect(result.data).toBeDefined();
-      expect(result.data).toHaveLength(5);
-      expect(result.has_more).toBe(true);
+      // Create agent by different author (should not be deleted)
+      await createAgent({
+        id: otherAuthorAgentId,
+        name: 'Other Author Agent',
+        provider: 'test',
+        model: 'test-model',
+        author: otherAuthorId,
+      });

-      for (const agent of result.data) {
-        expect(agent.author).toBe(authorId.toString());
-      }
+      // Create user with all agents in favorites
+      await User.create({
+        _id: userId,
+        name: 'Test User',
+        email: `test-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [
+          { agentId: agent1Id },
+          { agentId: agent2Id },
+          { agentId: otherAuthorAgentId },
+          { model: 'gpt-4', endpoint: 'openAI' },
+        ],
+      });
+
+      // Verify user has all favorites
+      const userBefore = await User.findById(userId);
+      expect(userBefore.favorites).toHaveLength(4);
+
+      // Delete all agents by the author
+      await deleteUserAgents(authorId.toString());
+
+      // Verify author's agents are deleted from database
+      expect(await getAgent({ id: agent1Id })).toBeNull();
+      expect(await getAgent({ id: agent2Id })).toBeNull();
+
+      // Verify other author's agent still exists
+      expect(await getAgent({ id: otherAuthorAgentId })).not.toBeNull();
+
+      // Verify user favorites: author's agents removed, others remain
+      const userAfter = await User.findById(userId);
+      expect(userAfter.favorites).toHaveLength(2);
+      expect(userAfter.favorites.some((f) => f.agentId === agent1Id)).toBe(false);
+      expect(userAfter.favorites.some((f) => f.agentId === agent2Id)).toBe(false);
+      expect(userAfter.favorites.some((f) => f.agentId === otherAuthorAgentId)).toBe(true);
+      expect(userAfter.favorites.some((f) => f.model === 'gpt-4')).toBe(true);
+    });
+
+    test('should handle deleteUserAgents when agents are in multiple users favorites', async () => {
+      const authorId = new mongoose.Types.ObjectId();
+      const user1Id = new mongoose.Types.ObjectId();
+      const user2Id = new mongoose.Types.ObjectId();
+      const user3Id = new mongoose.Types.ObjectId();
+
+      const agent1Id = `agent_${uuidv4()}`;
+      const agent2Id = `agent_${uuidv4()}`;
+      const unrelatedAgentId = `agent_${uuidv4()}`;
+
+      // Create agents by the author
+      await createAgent({
+        id: agent1Id,
+        name: 'Author Agent 1',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      await createAgent({
+        id: agent2Id,
+        name: 'Author Agent 2',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      // Create users with various favorites configurations
+      await User.create({
+        _id: user1Id,
+        name: 'User 1',
+        email: `user1-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: agent1Id }, { agentId: agent2Id }],
+      });
+
+      await User.create({
+        _id: user2Id,
+        name: 'User 2',
+        email: `user2-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: agent1Id }, { model: 'claude-3', endpoint: 'anthropic' }],
+      });
+
+      await User.create({
+        _id: user3Id,
+        name: 'User 3',
+        email: `user3-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: unrelatedAgentId }, { model: 'gpt-4', endpoint: 'openAI' }],
+      });
+
+      // Delete all agents by the author
+      await deleteUserAgents(authorId.toString());
+
+      // Verify all users' favorites are correctly updated
+      const user1After = await User.findById(user1Id);
+      expect(user1After.favorites).toHaveLength(0);
+
+      const user2After = await User.findById(user2Id);
+      expect(user2After.favorites).toHaveLength(1);
+      expect(user2After.favorites.some((f) => f.agentId === agent1Id)).toBe(false);
+      expect(user2After.favorites.some((f) => f.model === 'claude-3')).toBe(true);
+
+      // User 3 should be completely unaffected
+      const user3After = await User.findById(user3Id);
+      expect(user3After.favorites).toHaveLength(2);
+      expect(user3After.favorites.some((f) => f.agentId === unrelatedAgentId)).toBe(true);
+      expect(user3After.favorites.some((f) => f.model === 'gpt-4')).toBe(true);
+    });
+
+    test('should handle deleteUserAgents when user has no agents', async () => {
+      const authorWithNoAgentsId = new mongoose.Types.ObjectId();
+      const otherAuthorId = new mongoose.Types.ObjectId();
+      const userId = new mongoose.Types.ObjectId();
+
+      const existingAgentId = `agent_${uuidv4()}`;
+
+      // Create agent by different author
+      await createAgent({
+        id: existingAgentId,
+        name: 'Existing Agent',
+        provider: 'test',
+        model: 'test-model',
+        author: otherAuthorId,
+      });
+
+      // Create user with favorites
+      await User.create({
+        _id: userId,
+        name: 'Test User',
+        email: `test-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ agentId: existingAgentId }, { model: 'gpt-4', endpoint: 'openAI' }],
+      });
+
+      // Delete agents for user with no agents (should be a no-op)
+      await deleteUserAgents(authorWithNoAgentsId.toString());
+
+      // Verify existing agent still exists
+      expect(await getAgent({ id: existingAgentId })).not.toBeNull();
+
+      // Verify user favorites are unchanged
+      const userAfter = await User.findById(userId);
+      expect(userAfter.favorites).toHaveLength(2);
+      expect(userAfter.favorites.some((f) => f.agentId === existingAgentId)).toBe(true);
+      expect(userAfter.favorites.some((f) => f.model === 'gpt-4')).toBe(true);
+    });
+
+    test('should handle deleteUserAgents when agents are not in any favorites', async () => {
+      const authorId = new mongoose.Types.ObjectId();
+      const userId = new mongoose.Types.ObjectId();
+
+      const agent1Id = `agent_${uuidv4()}`;
+      const agent2Id = `agent_${uuidv4()}`;
+
+      // Create agents by the author
+      await createAgent({
+        id: agent1Id,
+        name: 'Agent 1',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      await createAgent({
+        id: agent2Id,
+        name: 'Agent 2',
+        provider: 'test',
+        model: 'test-model',
+        author: authorId,
+      });
+
+      // Create user with favorites that don't include these agents
+      await User.create({
+        _id: userId,
+        name: 'Test User',
+        email: `test-${uuidv4()}@example.com`,
+        provider: 'local',
+        favorites: [{ model: 'gpt-4', endpoint: 'openAI' }],
+      });
+
+      // Verify agents exist
+      expect(await getAgent({ id: agent1Id })).not.toBeNull();
+      expect(await getAgent({ id: agent2Id })).not.toBeNull();
+
+      // Delete all agents by the author
+      await deleteUserAgents(authorId.toString());
+
+      // Verify agents are deleted
+      expect(await getAgent({ id: agent1Id })).toBeNull();
+      expect(await getAgent({ id: agent2Id })).toBeNull();
+
+      // Verify user favorites are unchanged
+      const userAfter = await User.findById(userId);
+      expect(userAfter.favorites).toHaveLength(1);
+      expect(userAfter.favorites.some((f) => f.model === 'gpt-4')).toBe(true);
    });

    test('should update agent projects', async () => {
@ -733,26 +1179,6 @@ describe('models/Agent', () => {
        expect(result).toBe(expected);
      });

-      test('should handle getListAgents with invalid author format', async () => {
-        try {
-          const result = await getListAgents({ author: 'invalid-object-id' });
-          expect(result.data).toEqual([]);
-        } catch (error) {
-          expect(error).toBeDefined();
-        }
-      });
-
-      test('should handle getListAgents with no agents', async () => {
-        const authorId = new mongoose.Types.ObjectId();
-        const result = await getListAgents({ author: authorId.toString() });
-
-        expect(result).toBeDefined();
-        expect(result.data).toEqual([]);
-        expect(result.has_more).toBe(false);
-        expect(result.first_id).toBeNull();
-        expect(result.last_id).toBeNull();
-      });
-
      test('should handle updateAgentProjects with non-existent agent', async () => {
        const nonExistentId = `agent_${uuidv4()}`;
        const userId = new mongoose.Types.ObjectId();
@ -2366,17 +2792,6 @@ describe('models/Agent', () => {
      expect(result).toBeNull();
    });

-    test('should handle getListAgents with no agents', async () => {
-      const authorId = new mongoose.Types.ObjectId();
-      const result = await getListAgents({ author: authorId.toString() });
-
-      expect(result).toBeDefined();
-      expect(result.data).toEqual([]);
-      expect(result.has_more).toBe(false);
-      expect(result.first_id).toBeNull();
-      expect(result.last_id).toBeNull();
-    });
-
    test('should handle updateAgent with MongoDB operators mixed with direct updates', async () => {
      const agentId = `agent_${uuidv4()}`;
      const authorId = new mongoose.Types.ObjectId();
--- a/api/models/Conversation.js
+++ b/api/models/Conversation.js
@ -124,10 +124,15 @@ module.exports = {
        updateOperation,
        {
          new: true,
-          upsert: true,
+          upsert: metadata?.noUpsert !== true,
        },
      );

+      if (!conversation) {
+        logger.debug('[saveConvo] Conversation not found, skipping update');
+        return null;
+      }
+
      return conversation.toObject();
    } catch (error) {
      logger.error('[saveConvo] Error saving conversation', error);
@ -223,7 +228,7 @@ module.exports = {
            },
          ],
        };
-      } catch (err) {
+      } catch (_err) {
        logger.warn('[getConvosByCursor] Invalid cursor format, starting from beginning');
      }
      if (cursorFilter) {
@ -356,6 +361,7 @@ module.exports = {

      const deleteMessagesResult = await deleteMessages({
        conversationId: { $in: conversationIds },
+        user,
      });

      return { ...deleteConvoResult, messages: deleteMessagesResult };
--- a/api/models/Conversation.spec.js
+++ b/api/models/Conversation.spec.js
@ -106,6 +106,47 @@ describe('Conversation Operations', () => {
      expect(result.conversationId).toBe(newConversationId);
    });

+    it('should not create a conversation when noUpsert is true and conversation does not exist', async () => {
+      const nonExistentId = uuidv4();
+      const result = await saveConvo(
+        mockReq,
+        { conversationId: nonExistentId, title: 'Ghost Title' },
+        { noUpsert: true },
+      );
+
+      expect(result).toBeNull();
+
+      const dbConvo = await Conversation.findOne({ conversationId: nonExistentId });
+      expect(dbConvo).toBeNull();
+    });
+
+    it('should update an existing conversation when noUpsert is true', async () => {
+      await saveConvo(mockReq, mockConversationData);
+
+      const result = await saveConvo(
+        mockReq,
+        { conversationId: mockConversationData.conversationId, title: 'Updated Title' },
+        { noUpsert: true },
+      );
+
+      expect(result).not.toBeNull();
+      expect(result.title).toBe('Updated Title');
+      expect(result.conversationId).toBe(mockConversationData.conversationId);
+    });
+
+    it('should still upsert by default when noUpsert is not provided', async () => {
+      const newId = uuidv4();
+      const result = await saveConvo(mockReq, {
+        conversationId: newId,
+        title: 'New Conversation',
+        endpoint: EModelEndpoint.openAI,
+      });
+
+      expect(result).not.toBeNull();
+      expect(result.conversationId).toBe(newId);
+      expect(result.title).toBe('New Conversation');
+    });
+
    it('should handle unsetFields metadata', async () => {
      const metadata = {
        unsetFields: { someField: 1 },
@ -122,7 +163,6 @@ describe('Conversation Operations', () => {

  describe('isTemporary conversation handling', () => {
    it('should save a conversation with expiredAt when isTemporary is true', async () => {
-      // Mock app config with 24 hour retention
      mockReq.config.interfaceConfig.temporaryChatRetention = 24;

      mockReq.body = { isTemporary: true };
@ -135,7 +175,6 @@ describe('Conversation Operations', () => {
      expect(result.expiredAt).toBeDefined();
      expect(result.expiredAt).toBeInstanceOf(Date);

-      // Verify expiredAt is approximately 24 hours in the future
      const expectedExpirationTime = new Date(beforeSave.getTime() + 24 * 60 * 60 * 1000);
      const actualExpirationTime = new Date(result.expiredAt);

@ -157,7 +196,6 @@ describe('Conversation Operations', () => {
    });

    it('should save a conversation without expiredAt when isTemporary is not provided', async () => {
-      // No isTemporary in body
      mockReq.body = {};

      const result = await saveConvo(mockReq, mockConversationData);
@ -167,7 +205,6 @@ describe('Conversation Operations', () => {
    });

    it('should use custom retention period from config', async () => {
-      // Mock app config with 48 hour retention
      mockReq.config.interfaceConfig.temporaryChatRetention = 48;

      mockReq.body = { isTemporary: true };
@ -512,6 +549,7 @@ describe('Conversation Operations', () => {
      expect(result.messages.deletedCount).toBe(5);
      expect(deleteMessages).toHaveBeenCalledWith({
        conversationId: { $in: [mockConversationData.conversationId] },
+        user: 'user123',
      });

      // Verify conversation was deleted
--- a/api/models/File.js
+++ b/api/models/File.js
@ -26,7 +26,8 @@ const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => {
 };

 /**
- * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs
+ * Retrieves tool files (files that are embedded or have a fileIdentifier) from an array of file IDs.
+ * Note: execute_code files are handled separately by getCodeGeneratedFiles.
 * @param {string[]} fileIds - Array of file_id strings to search for
 * @param {Set<EToolResources>} toolResourceSet - Optional filter for tool resources
 * @returns {Promise<Array<MongoFile>>} Files that match the criteria
@ -37,21 +38,25 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
  }

  try {
-    const filter = {
-      file_id: { $in: fileIds },
-      $or: [],
-    };
+    const orConditions = [];

    if (toolResourceSet.has(EToolResources.context)) {
-      filter.$or.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
+      orConditions.push({ text: { $exists: true, $ne: null }, context: FileContext.agents });
    }
    if (toolResourceSet.has(EToolResources.file_search)) {
-      filter.$or.push({ embedded: true });
+      orConditions.push({ embedded: true });
    }
-    if (toolResourceSet.has(EToolResources.execute_code)) {
-      filter.$or.push({ 'metadata.fileIdentifier': { $exists: true } });
+
+    if (orConditions.length === 0) {
+      return [];
    }

+    const filter = {
+      file_id: { $in: fileIds },
+      context: { $ne: FileContext.execute_code }, // Exclude code-generated files
+      $or: orConditions,
+    };
+
    const selectFields = { text: 0 };
    const sortOptions = { updatedAt: -1 };

@ -62,6 +67,70 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => {
  }
 };

+/**
+ * Retrieves files generated by code execution for a given conversation.
+ * These files are stored locally with fileIdentifier metadata for code env re-upload.
+ * @param {string} conversationId - The conversation ID to search for
+ * @param {string[]} [messageIds] - Optional array of messageIds to filter by (for linear thread filtering)
+ * @returns {Promise<Array<MongoFile>>} Files generated by code execution in the conversation
+ */
+const getCodeGeneratedFiles = async (conversationId, messageIds) => {
+  if (!conversationId) {
+    return [];
+  }
+
+  /** messageIds are required for proper thread filtering of code-generated files */
+  if (!messageIds || messageIds.length === 0) {
+    return [];
+  }
+
+  try {
+    const filter = {
+      conversationId,
+      context: FileContext.execute_code,
+      messageId: { $exists: true, $in: messageIds },
+      'metadata.fileIdentifier': { $exists: true },
+    };
+
+    const selectFields = { text: 0 };
+    const sortOptions = { createdAt: 1 };
+
+    return await getFiles(filter, sortOptions, selectFields);
+  } catch (error) {
+    logger.error('[getCodeGeneratedFiles] Error retrieving code generated files:', error);
+    return [];
+  }
+};
+
+/**
+ * Retrieves user-uploaded execute_code files (not code-generated) by their file IDs.
+ * These are files with fileIdentifier metadata but context is NOT execute_code (e.g., agents or message_attachment).
+ * File IDs should be collected from message.files arrays in the current thread.
+ * @param {string[]} fileIds - Array of file IDs to fetch (from message.files in the thread)
+ * @returns {Promise<Array<MongoFile>>} User-uploaded execute_code files
+ */
+const getUserCodeFiles = async (fileIds) => {
+  if (!fileIds || fileIds.length === 0) {
+    return [];
+  }
+
+  try {
+    const filter = {
+      file_id: { $in: fileIds },
+      context: { $ne: FileContext.execute_code },
+      'metadata.fileIdentifier': { $exists: true },
+    };
+
+    const selectFields = { text: 0 };
+    const sortOptions = { createdAt: 1 };
+
+    return await getFiles(filter, sortOptions, selectFields);
+  } catch (error) {
+    logger.error('[getUserCodeFiles] Error retrieving user code files:', error);
+    return [];
+  }
+};
+
 /**
 * Creates a new file with a TTL of 1 hour.
 * @param {MongoFile} data - The file data to be created, must contain file_id.
@ -169,6 +238,8 @@ module.exports = {
  findFileById,
  getFiles,
  getToolFilesByIds,
+  getCodeGeneratedFiles,
+  getUserCodeFiles,
  createFile,
  updateFile,
  updateFileUsage,
--- a/api/models/Role.js
+++ b/api/models/Role.js
@ -114,6 +114,28 @@ async function updateAccessPermissions(roleName, permissionsUpdate, roleData) {
      }
    }

+    // Migrate legacy SHARED_GLOBAL → SHARE for PROMPTS and AGENTS.
+    // SHARED_GLOBAL was removed in favour of SHARE in PR #11283. If the DB still has
+    // SHARED_GLOBAL but not SHARE, inherit the value so sharing intent is preserved.
+    const legacySharedGlobalTypes = ['PROMPTS', 'AGENTS'];
+    for (const legacyPermType of legacySharedGlobalTypes) {
+      const existingTypePerms = currentPermissions[legacyPermType];
+      if (
+        existingTypePerms &&
+        'SHARED_GLOBAL' in existingTypePerms &&
+        !('SHARE' in existingTypePerms) &&
+        updates[legacyPermType] &&
+        // Don't override an explicit SHARE value the caller already provided
+        !('SHARE' in updates[legacyPermType])
+      ) {
+        const inheritedValue = existingTypePerms['SHARED_GLOBAL'];
+        updates[legacyPermType]['SHARE'] = inheritedValue;
+        logger.info(
+          `Migrating '${roleName}' role ${legacyPermType}.SHARED_GLOBAL=${inheritedValue} → SHARE`,
+        );
+      }
+    }
+
    for (const [permissionType, permissions] of Object.entries(updates)) {
      const currentTypePermissions = currentPermissions[permissionType] || {};
      updatedPermissions[permissionType] = { ...currentTypePermissions };
@ -129,6 +151,32 @@ async function updateAccessPermissions(roleName, permissionsUpdate, roleData) {
      }
    }

+    // Clean up orphaned SHARED_GLOBAL fields left in DB after the schema rename.
+    // Since we $set the full permissions object, deleting from updatedPermissions
+    // is sufficient to remove the field from MongoDB.
+    for (const legacyPermType of legacySharedGlobalTypes) {
+      const existingTypePerms = currentPermissions[legacyPermType];
+      if (existingTypePerms && 'SHARED_GLOBAL' in existingTypePerms) {
+        if (!updates[legacyPermType]) {
+          // permType wasn't in the update payload so the migration block above didn't run.
+          // Create a writable copy and handle the SHARED_GLOBAL → SHARE inheritance here
+          // to avoid removing SHARED_GLOBAL without writing SHARE (data loss).
+          updatedPermissions[legacyPermType] = { ...existingTypePerms };
+          if (!('SHARE' in existingTypePerms)) {
+            updatedPermissions[legacyPermType]['SHARE'] = existingTypePerms['SHARED_GLOBAL'];
+            logger.info(
+              `Migrating '${roleName}' role ${legacyPermType}.SHARED_GLOBAL=${existingTypePerms['SHARED_GLOBAL']} → SHARE`,
+            );
+          }
+        }
+        delete updatedPermissions[legacyPermType]['SHARED_GLOBAL'];
+        hasChanges = true;
+        logger.info(
+          `Removed legacy SHARED_GLOBAL field from '${roleName}' role ${legacyPermType} permissions`,
+        );
+      }
+    }
+
    if (hasChanges) {
      const updateObj = { permissions: updatedPermissions };

--- a/api/models/Role.spec.js
+++ b/api/models/Role.spec.js
@ -46,7 +46,7 @@ describe('updateAccessPermissions', () => {
        [PermissionTypes.PROMPTS]: {
          CREATE: true,
          USE: true,
-          SHARED_GLOBAL: false,
+          SHARE: false,
        },
      },
    }).save();
@ -55,7 +55,7 @@ describe('updateAccessPermissions', () => {
      [PermissionTypes.PROMPTS]: {
        CREATE: true,
        USE: true,
-        SHARED_GLOBAL: true,
+        SHARE: true,
      },
    });

@ -63,7 +63,7 @@ describe('updateAccessPermissions', () => {
    expect(updatedRole.permissions[PermissionTypes.PROMPTS]).toEqual({
      CREATE: true,
      USE: true,
-      SHARED_GLOBAL: true,
+      SHARE: true,
    });
  });

@ -74,7 +74,7 @@ describe('updateAccessPermissions', () => {
        [PermissionTypes.PROMPTS]: {
          CREATE: true,
          USE: true,
-          SHARED_GLOBAL: false,
+          SHARE: false,
        },
      },
    }).save();
@ -83,7 +83,7 @@ describe('updateAccessPermissions', () => {
      [PermissionTypes.PROMPTS]: {
        CREATE: true,
        USE: true,
-        SHARED_GLOBAL: false,
+        SHARE: false,
      },
    });

@ -91,7 +91,7 @@ describe('updateAccessPermissions', () => {
    expect(updatedRole.permissions[PermissionTypes.PROMPTS]).toEqual({
      CREATE: true,
      USE: true,
-      SHARED_GLOBAL: false,
+      SHARE: false,
    });
  });

@ -110,20 +110,20 @@ describe('updateAccessPermissions', () => {
        [PermissionTypes.PROMPTS]: {
          CREATE: true,
          USE: true,
-          SHARED_GLOBAL: false,
+          SHARE: false,
        },
      },
    }).save();

    await updateAccessPermissions(SystemRoles.USER, {
-      [PermissionTypes.PROMPTS]: { SHARED_GLOBAL: true },
+      [PermissionTypes.PROMPTS]: { SHARE: true },
    });

    const updatedRole = await getRoleByName(SystemRoles.USER);
    expect(updatedRole.permissions[PermissionTypes.PROMPTS]).toEqual({
      CREATE: true,
      USE: true,
-      SHARED_GLOBAL: true,
+      SHARE: true,
    });
  });

@ -134,7 +134,7 @@ describe('updateAccessPermissions', () => {
        [PermissionTypes.PROMPTS]: {
          CREATE: true,
          USE: true,
-          SHARED_GLOBAL: false,
+          SHARE: false,
        },
      },
    }).save();
@ -147,7 +147,7 @@ describe('updateAccessPermissions', () => {
    expect(updatedRole.permissions[PermissionTypes.PROMPTS]).toEqual({
      CREATE: true,
      USE: false,
-      SHARED_GLOBAL: false,
+      SHARE: false,
    });
  });

@ -155,13 +155,13 @@ describe('updateAccessPermissions', () => {
    await new Role({
      name: SystemRoles.USER,
      permissions: {
-        [PermissionTypes.PROMPTS]: { CREATE: true, USE: true, SHARED_GLOBAL: false },
+        [PermissionTypes.PROMPTS]: { CREATE: true, USE: true, SHARE: false },
        [PermissionTypes.BOOKMARKS]: { USE: true },
      },
    }).save();

    await updateAccessPermissions(SystemRoles.USER, {
-      [PermissionTypes.PROMPTS]: { USE: false, SHARED_GLOBAL: true },
+      [PermissionTypes.PROMPTS]: { USE: false, SHARE: true },
      [PermissionTypes.BOOKMARKS]: { USE: false },
    });

@ -169,7 +169,7 @@ describe('updateAccessPermissions', () => {
    expect(updatedRole.permissions[PermissionTypes.PROMPTS]).toEqual({
      CREATE: true,
      USE: false,
-      SHARED_GLOBAL: true,
+      SHARE: true,
    });
    expect(updatedRole.permissions[PermissionTypes.BOOKMARKS]).toEqual({ USE: false });
  });
@ -178,19 +178,19 @@ describe('updateAccessPermissions', () => {
    await new Role({
      name: SystemRoles.USER,
      permissions: {
-        [PermissionTypes.PROMPTS]: { CREATE: true, USE: true, SHARED_GLOBAL: false },
+        [PermissionTypes.PROMPTS]: { CREATE: true, USE: true, SHARE: false },
      },
    }).save();

    await updateAccessPermissions(SystemRoles.USER, {
-      [PermissionTypes.PROMPTS]: { USE: false, SHARED_GLOBAL: true },
+      [PermissionTypes.PROMPTS]: { USE: false, SHARE: true },
    });

    const updatedRole = await getRoleByName(SystemRoles.USER);
    expect(updatedRole.permissions[PermissionTypes.PROMPTS]).toEqual({
      CREATE: true,
      USE: false,
-      SHARED_GLOBAL: true,
+      SHARE: true,
    });
  });

@ -214,13 +214,13 @@ describe('updateAccessPermissions', () => {
    await new Role({
      name: SystemRoles.USER,
      permissions: {
-        [PermissionTypes.PROMPTS]: { CREATE: true, USE: true, SHARED_GLOBAL: false },
+        [PermissionTypes.PROMPTS]: { CREATE: true, USE: true, SHARE: false },
        [PermissionTypes.MULTI_CONVO]: { USE: false },
      },
    }).save();

    await updateAccessPermissions(SystemRoles.USER, {
-      [PermissionTypes.PROMPTS]: { SHARED_GLOBAL: true },
+      [PermissionTypes.PROMPTS]: { SHARE: true },
      [PermissionTypes.MULTI_CONVO]: { USE: true },
    });

@ -228,11 +228,117 @@ describe('updateAccessPermissions', () => {
    expect(updatedRole.permissions[PermissionTypes.PROMPTS]).toEqual({
      CREATE: true,
      USE: true,
-      SHARED_GLOBAL: true,
+      SHARE: true,
    });
    expect(updatedRole.permissions[PermissionTypes.MULTI_CONVO]).toEqual({ USE: true });
  });

+  it('should inherit SHARED_GLOBAL value into SHARE when SHARE is absent from both DB and update', async () => {
+    // Simulates the startup backfill path: caller sends SHARE_PUBLIC but not SHARE;
+    // migration should inherit SHARED_GLOBAL to preserve the deployment's sharing intent.
+    await Role.collection.insertOne({
+      name: SystemRoles.USER,
+      permissions: {
+        [PermissionTypes.PROMPTS]: { USE: true, CREATE: true, SHARED_GLOBAL: true },
+        [PermissionTypes.AGENTS]: { USE: true, CREATE: true, SHARED_GLOBAL: false },
+      },
+    });
+
+    await updateAccessPermissions(SystemRoles.USER, {
+      // No explicit SHARE — migration should inherit from SHARED_GLOBAL
+      [PermissionTypes.PROMPTS]: { SHARE_PUBLIC: false },
+      [PermissionTypes.AGENTS]: { SHARE_PUBLIC: false },
+    });
+
+    const updatedRole = await getRoleByName(SystemRoles.USER);
+
+    // SHARED_GLOBAL=true → SHARE=true (inherited)
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARE).toBe(true);
+    // SHARED_GLOBAL=false → SHARE=false (inherited)
+    expect(updatedRole.permissions[PermissionTypes.AGENTS].SHARE).toBe(false);
+    // SHARED_GLOBAL cleaned up
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARED_GLOBAL).toBeUndefined();
+    expect(updatedRole.permissions[PermissionTypes.AGENTS].SHARED_GLOBAL).toBeUndefined();
+  });
+
+  it('should respect explicit SHARE in update payload and not override it with SHARED_GLOBAL', async () => {
+    // Caller explicitly passes SHARE: false even though SHARED_GLOBAL=true in DB.
+    // The explicit intent must win; migration must not silently overwrite it.
+    await Role.collection.insertOne({
+      name: SystemRoles.USER,
+      permissions: {
+        [PermissionTypes.PROMPTS]: { USE: true, SHARED_GLOBAL: true },
+      },
+    });
+
+    await updateAccessPermissions(SystemRoles.USER, {
+      [PermissionTypes.PROMPTS]: { SHARE: false }, // explicit false — should be preserved
+    });
+
+    const updatedRole = await getRoleByName(SystemRoles.USER);
+
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARE).toBe(false);
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARED_GLOBAL).toBeUndefined();
+  });
+
+  it('should migrate SHARED_GLOBAL to SHARE even when the permType is not in the update payload', async () => {
+    // Bug #2 regression: cleanup block removes SHARED_GLOBAL but migration block only
+    // runs when the permType is in the update payload. Without the fix, SHARE would be
+    // lost when any other permType (e.g. MULTI_CONVO) is the only thing being updated.
+    await Role.collection.insertOne({
+      name: SystemRoles.USER,
+      permissions: {
+        [PermissionTypes.PROMPTS]: {
+          USE: true,
+          SHARED_GLOBAL: true, // legacy — NO SHARE present
+        },
+        [PermissionTypes.MULTI_CONVO]: { USE: false },
+      },
+    });
+
+    // Only update MULTI_CONVO — PROMPTS is intentionally absent from the payload
+    await updateAccessPermissions(SystemRoles.USER, {
+      [PermissionTypes.MULTI_CONVO]: { USE: true },
+    });
+
+    const updatedRole = await getRoleByName(SystemRoles.USER);
+
+    // SHARE should have been inherited from SHARED_GLOBAL, not silently dropped
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARE).toBe(true);
+    // SHARED_GLOBAL should be removed
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARED_GLOBAL).toBeUndefined();
+    // Original USE should be untouched
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].USE).toBe(true);
+    // The actual update should have applied
+    expect(updatedRole.permissions[PermissionTypes.MULTI_CONVO].USE).toBe(true);
+  });
+
+  it('should remove orphaned SHARED_GLOBAL when SHARE already exists and permType is not in update', async () => {
+    // Safe cleanup case: SHARE already set, SHARED_GLOBAL is just orphaned noise.
+    // SHARE must not be changed; SHARED_GLOBAL must be removed.
+    await Role.collection.insertOne({
+      name: SystemRoles.USER,
+      permissions: {
+        [PermissionTypes.PROMPTS]: {
+          USE: true,
+          SHARE: true, // already migrated
+          SHARED_GLOBAL: true, // orphaned
+        },
+        [PermissionTypes.MULTI_CONVO]: { USE: false },
+      },
+    });
+
+    await updateAccessPermissions(SystemRoles.USER, {
+      [PermissionTypes.MULTI_CONVO]: { USE: true },
+    });
+
+    const updatedRole = await getRoleByName(SystemRoles.USER);
+
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARED_GLOBAL).toBeUndefined();
+    expect(updatedRole.permissions[PermissionTypes.PROMPTS].SHARE).toBe(true);
+    expect(updatedRole.permissions[PermissionTypes.MULTI_CONVO].USE).toBe(true);
+  });
+
  it('should not update MULTI_CONVO permissions when no changes are needed', async () => {
    await new Role({
      name: SystemRoles.USER,
@ -271,7 +377,7 @@ describe('initializeRoles', () => {
    });

    // Example: Check default values for ADMIN role
-    expect(adminRole.permissions[PermissionTypes.PROMPTS].SHARED_GLOBAL).toBe(true);
+    expect(adminRole.permissions[PermissionTypes.PROMPTS].SHARE).toBe(true);
    expect(adminRole.permissions[PermissionTypes.BOOKMARKS].USE).toBe(true);
    expect(adminRole.permissions[PermissionTypes.AGENTS].CREATE).toBe(true);
  });
@ -283,7 +389,7 @@ describe('initializeRoles', () => {
        [PermissionTypes.PROMPTS]: {
          [Permissions.USE]: false,
          [Permissions.CREATE]: true,
-          [Permissions.SHARED_GLOBAL]: true,
+          [Permissions.SHARE]: true,
        },
        [PermissionTypes.BOOKMARKS]: { [Permissions.USE]: false },
      },
@ -320,7 +426,7 @@ describe('initializeRoles', () => {
    expect(userRole.permissions[PermissionTypes.AGENTS]).toBeDefined();
    expect(userRole.permissions[PermissionTypes.AGENTS].CREATE).toBeDefined();
    expect(userRole.permissions[PermissionTypes.AGENTS].USE).toBeDefined();
-    expect(userRole.permissions[PermissionTypes.AGENTS].SHARED_GLOBAL).toBeDefined();
+    expect(userRole.permissions[PermissionTypes.AGENTS].SHARE).toBeDefined();
  });

  it('should handle multiple runs without duplicating or modifying data', async () => {
@ -348,7 +454,7 @@ describe('initializeRoles', () => {
        [PermissionTypes.PROMPTS]: {
          [Permissions.USE]: false,
          [Permissions.CREATE]: false,
-          [Permissions.SHARED_GLOBAL]: false,
+          [Permissions.SHARE]: false,
        },
        [PermissionTypes.BOOKMARKS]:
          roleDefaults[SystemRoles.ADMIN].permissions[PermissionTypes.BOOKMARKS],
@ -365,7 +471,7 @@ describe('initializeRoles', () => {
    expect(adminRole.permissions[PermissionTypes.AGENTS]).toBeDefined();
    expect(adminRole.permissions[PermissionTypes.AGENTS].CREATE).toBeDefined();
    expect(adminRole.permissions[PermissionTypes.AGENTS].USE).toBeDefined();
-    expect(adminRole.permissions[PermissionTypes.AGENTS].SHARED_GLOBAL).toBeDefined();
+    expect(adminRole.permissions[PermissionTypes.AGENTS].SHARE).toBeDefined();
  });

  it('should include MULTI_CONVO permissions when creating default roles', async () => {
--- a/api/models/Transaction.js
+++ b/api/models/Transaction.js
@ -1,153 +1,19 @@
-const { logger } = require('@librechat/data-schemas');
+const { logger, CANCEL_RATE } = require('@librechat/data-schemas');
 const { getMultiplier, getCacheMultiplier } = require('./tx');
-const { Transaction, Balance } = require('~/db/models');
-
-const cancelRate = 1.15;
-
-/**
- * Updates a user's token balance based on a transaction using optimistic concurrency control
- * without schema changes. Compatible with DocumentDB.
- * @async
- * @function
- * @param {Object} params - The function parameters.
- * @param {string|mongoose.Types.ObjectId} params.user - The user ID.
- * @param {number} params.incrementValue - The value to increment the balance by (can be negative).
- * @param {import('mongoose').UpdateQuery<import('@librechat/data-schemas').IBalance>['$set']} [params.setValues] - Optional additional fields to set.
- * @returns {Promise<Object>} Returns the updated balance document (lean).
- * @throws {Error} Throws an error if the update fails after multiple retries.
- */
-const updateBalance = async ({ user, incrementValue, setValues }) => {
-  let maxRetries = 10; // Number of times to retry on conflict
-  let delay = 50; // Initial retry delay in ms
-  let lastError = null;
-
-  for (let attempt = 1; attempt <= maxRetries; attempt++) {
-    let currentBalanceDoc;
-    try {
-      // 1. Read the current document state
-      currentBalanceDoc = await Balance.findOne({ user }).lean();
-      const currentCredits = currentBalanceDoc ? currentBalanceDoc.tokenCredits : 0;
-
-      // 2. Calculate the desired new state
-      const potentialNewCredits = currentCredits + incrementValue;
-      const newCredits = Math.max(0, potentialNewCredits); // Ensure balance doesn't go below zero
-
-      // 3. Prepare the update payload
-      const updatePayload = {
-        $set: {
-          tokenCredits: newCredits,
-          ...(setValues || {}), // Merge other values to set
-        },
-      };
-
-      // 4. Attempt the conditional update or upsert
-      let updatedBalance = null;
-      if (currentBalanceDoc) {
-        // --- Document Exists: Perform Conditional Update ---
-        // Try to update only if the tokenCredits match the value we read (currentCredits)
-        updatedBalance = await Balance.findOneAndUpdate(
-          {
-            user: user,
-            tokenCredits: currentCredits, // Optimistic lock: condition based on the read value
-          },
-          updatePayload,
-          {
-            new: true, // Return the modified document
-            // lean: true, // .lean() is applied after query execution in Mongoose >= 6
-          },
-        ).lean(); // Use lean() for plain JS object
-
-        if (updatedBalance) {
-          // Success! The update was applied based on the expected current state.
-          return updatedBalance;
-        }
-        // If updatedBalance is null, it means tokenCredits changed between read and write (conflict).
-        lastError = new Error(`Concurrency conflict for user ${user} on attempt ${attempt}.`);
-        // Proceed to retry logic below.
-      } else {
-        // --- Document Does Not Exist: Perform Conditional Upsert ---
-        // Try to insert the document, but only if it still doesn't exist.
-        // Using tokenCredits: {$exists: false} helps prevent race conditions where
-        // another process creates the doc between our findOne and findOneAndUpdate.
-        try {
-          updatedBalance = await Balance.findOneAndUpdate(
-            {
-              user: user,
-              // Attempt to match only if the document doesn't exist OR was just created
-              // without tokenCredits (less likely but possible). A simple { user } filter
-              // might also work, relying on the retry for conflicts.
-              // Let's use a simpler filter and rely on retry for races.
-              // tokenCredits: { $exists: false } // This condition might be too strict if doc exists with 0 credits
-            },
-            updatePayload,
-            {
-              upsert: true, // Create if doesn't exist
-              new: true, // Return the created/updated document
-              // setDefaultsOnInsert: true, // Ensure schema defaults are applied on insert
-              // lean: true,
-            },
-          ).lean();
-
-          if (updatedBalance) {
-            // Upsert succeeded (likely created the document)
-            return updatedBalance;
-          }
-          // If null, potentially a rare race condition during upsert. Retry should handle it.
-          lastError = new Error(
-            `Upsert race condition suspected for user ${user} on attempt ${attempt}.`,
-          );
-        } catch (error) {
-          if (error.code === 11000) {
-            // E11000 duplicate key error on index
-            // This means another process created the document *just* before our upsert.
-            // It's a concurrency conflict during creation. We should retry.
-            lastError = error; // Store the error
-            // Proceed to retry logic below.
-          } else {
-            // Different error, rethrow
-            throw error;
-          }
-        }
-      } // End if/else (document exists?)
-    } catch (error) {
-      // Catch errors from findOne or unexpected findOneAndUpdate errors
-      logger.error(`[updateBalance] Error during attempt ${attempt} for user ${user}:`, error);
-      lastError = error; // Store the error
-      // Consider stopping retries for non-transient errors, but for now, we retry.
-    }
-
-    // If we reached here, it means the update failed (conflict or error), wait and retry
-    if (attempt < maxRetries) {
-      const jitter = Math.random() * delay * 0.5; // Add jitter to delay
-      await new Promise((resolve) => setTimeout(resolve, delay + jitter));
-      delay = Math.min(delay * 2, 2000); // Exponential backoff with cap
-    }
-  } // End for loop (retries)
-
-  // If loop finishes without success, throw the last encountered error or a generic one
-  logger.error(
-    `[updateBalance] Failed to update balance for user ${user} after ${maxRetries} attempts.`,
-  );
-  throw (
-    lastError ||
-    new Error(
-      `Failed to update balance for user ${user} after maximum retries due to persistent conflicts.`,
-    )
-  );
-};
+const { Transaction } = require('~/db/models');
+const { updateBalance } = require('~/models');

 /** Method to calculate and set the tokenValue for a transaction */
 function calculateTokenValue(txn) {
-  if (!txn.valueKey || !txn.tokenType) {
-    txn.tokenValue = txn.rawAmount;
-  }
-  const { valueKey, tokenType, model, endpointTokenConfig } = txn;
-  const multiplier = Math.abs(getMultiplier({ valueKey, tokenType, model, endpointTokenConfig }));
+  const { valueKey, tokenType, model, endpointTokenConfig, inputTokenCount } = txn;
+  const multiplier = Math.abs(
+    getMultiplier({ valueKey, tokenType, model, endpointTokenConfig, inputTokenCount }),
+  );
  txn.rate = multiplier;
  txn.tokenValue = txn.rawAmount * multiplier;
  if (txn.context && txn.tokenType === 'completion' && txn.context === 'incomplete') {
-    txn.tokenValue = Math.ceil(txn.tokenValue * cancelRate);
-    txn.rate *= cancelRate;
+    txn.tokenValue = Math.ceil(txn.tokenValue * CANCEL_RATE);
+    txn.rate *= CANCEL_RATE;
  }
 }

@ -166,6 +32,7 @@ async function createAutoRefillTransaction(txData) {
  }
  const transaction = new Transaction(txData);
  transaction.endpointTokenConfig = txData.endpointTokenConfig;
+  transaction.inputTokenCount = txData.inputTokenCount;
  calculateTokenValue(transaction);
  await transaction.save();

@ -200,6 +67,7 @@ async function createTransaction(_txData) {

  const transaction = new Transaction(txData);
  transaction.endpointTokenConfig = txData.endpointTokenConfig;
+  transaction.inputTokenCount = txData.inputTokenCount;
  calculateTokenValue(transaction);

  await transaction.save();
@ -231,10 +99,9 @@ async function createStructuredTransaction(_txData) {
    return;
  }

-  const transaction = new Transaction({
-    ...txData,
-    endpointTokenConfig: txData.endpointTokenConfig,
-  });
+  const transaction = new Transaction(txData);
+  transaction.endpointTokenConfig = txData.endpointTokenConfig;
+  transaction.inputTokenCount = txData.inputTokenCount;

  calculateStructuredTokenValue(transaction);

@ -266,10 +133,15 @@ function calculateStructuredTokenValue(txn) {
    return;
  }

-  const { model, endpointTokenConfig } = txn;
+  const { model, endpointTokenConfig, inputTokenCount } = txn;

  if (txn.tokenType === 'prompt') {
-    const inputMultiplier = getMultiplier({ tokenType: 'prompt', model, endpointTokenConfig });
+    const inputMultiplier = getMultiplier({
+      tokenType: 'prompt',
+      model,
+      endpointTokenConfig,
+      inputTokenCount,
+    });
    const writeMultiplier =
      getCacheMultiplier({ cacheType: 'write', model, endpointTokenConfig }) ?? inputMultiplier;
    const readMultiplier =
@ -304,18 +176,23 @@ function calculateStructuredTokenValue(txn) {

    txn.rawAmount = -totalPromptTokens;
  } else if (txn.tokenType === 'completion') {
-    const multiplier = getMultiplier({ tokenType: txn.tokenType, model, endpointTokenConfig });
+    const multiplier = getMultiplier({
+      tokenType: txn.tokenType,
+      model,
+      endpointTokenConfig,
+      inputTokenCount,
+    });
    txn.rate = Math.abs(multiplier);
    txn.tokenValue = -Math.abs(txn.rawAmount) * multiplier;
    txn.rawAmount = -Math.abs(txn.rawAmount);
  }

  if (txn.context && txn.tokenType === 'completion' && txn.context === 'incomplete') {
-    txn.tokenValue = Math.ceil(txn.tokenValue * cancelRate);
-    txn.rate *= cancelRate;
+    txn.tokenValue = Math.ceil(txn.tokenValue * CANCEL_RATE);
+    txn.rate *= CANCEL_RATE;
    if (txn.rateDetail) {
      txn.rateDetail = Object.fromEntries(
-        Object.entries(txn.rateDetail).map(([k, v]) => [k, v * cancelRate]),
+        Object.entries(txn.rateDetail).map(([k, v]) => [k, v * CANCEL_RATE]),
      );
    }
  }
--- a/api/models/Transaction.spec.js
+++ b/api/models/Transaction.spec.js
@ -1,8 +1,10 @@
 const mongoose = require('mongoose');
+const { recordCollectedUsage } = require('@librechat/api');
+const { createMethods } = require('@librechat/data-schemas');
 const { MongoMemoryServer } = require('mongodb-memory-server');
-const { spendTokens, spendStructuredTokens } = require('./spendTokens');
-const { getMultiplier, getCacheMultiplier } = require('./tx');
+const { getMultiplier, getCacheMultiplier, premiumTokenValues, tokenValues } = require('./tx');
 const { createTransaction, createStructuredTransaction } = require('./Transaction');
+const { spendTokens, spendStructuredTokens } = require('./spendTokens');
 const { Balance, Transaction } = require('~/db/models');

 let mongoServer;
@ -564,3 +566,760 @@ describe('Transactions Config Tests', () => {
    expect(balance.tokenCredits).toBe(initialBalance);
  });
 });
+
+describe('calculateTokenValue Edge Cases', () => {
+  test('should derive multiplier from model when valueKey is not provided', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'gpt-4';
+    const promptTokens = 1000;
+
+    const result = await createTransaction({
+      user: userId,
+      conversationId: 'test-no-valuekey',
+      model,
+      tokenType: 'prompt',
+      rawAmount: -promptTokens,
+      context: 'test',
+      balance: { enabled: true },
+    });
+
+    const expectedRate = getMultiplier({ model, tokenType: 'prompt' });
+    expect(result.rate).toBe(expectedRate);
+
+    const tx = await Transaction.findOne({ user: userId });
+    expect(tx.tokenValue).toBe(-promptTokens * expectedRate);
+    expect(tx.rate).toBe(expectedRate);
+  });
+
+  test('should derive valueKey and apply correct rate for an unknown model with tokenType', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    await createTransaction({
+      user: userId,
+      conversationId: 'test-unknown-model',
+      model: 'some-unrecognized-model-xyz',
+      tokenType: 'prompt',
+      rawAmount: -500,
+      context: 'test',
+      balance: { enabled: true },
+    });
+
+    const tx = await Transaction.findOne({ user: userId });
+    expect(tx.rate).toBeDefined();
+    expect(tx.rate).toBeGreaterThan(0);
+    expect(tx.tokenValue).toBe(tx.rawAmount * tx.rate);
+  });
+
+  test('should correctly apply model-derived multiplier without valueKey for completion', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-6';
+    const completionTokens = 500;
+
+    const result = await createTransaction({
+      user: userId,
+      conversationId: 'test-completion-no-valuekey',
+      model,
+      tokenType: 'completion',
+      rawAmount: -completionTokens,
+      context: 'test',
+      balance: { enabled: true },
+    });
+
+    const expectedRate = getMultiplier({ model, tokenType: 'completion' });
+    expect(expectedRate).toBe(tokenValues[model].completion);
+    expect(result.rate).toBe(expectedRate);
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(
+      initialBalance - completionTokens * expectedRate,
+      0,
+    );
+  });
+});
+
+describe('Premium Token Pricing Integration Tests', () => {
+  test('spendTokens should apply standard pricing when prompt tokens are below premium threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-6';
+    const promptTokens = 100000;
+    const completionTokens = 500;
+
+    const txData = {
+      user: userId,
+      conversationId: 'test-premium-below',
+      model,
+      context: 'test',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    await spendTokens(txData, { promptTokens, completionTokens });
+
+    const standardPromptRate = tokenValues[model].prompt;
+    const standardCompletionRate = tokenValues[model].completion;
+    const expectedCost =
+      promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+
+  test('spendTokens should apply premium pricing when prompt tokens exceed premium threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-6';
+    const promptTokens = 250000;
+    const completionTokens = 500;
+
+    const txData = {
+      user: userId,
+      conversationId: 'test-premium-above',
+      model,
+      context: 'test',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    await spendTokens(txData, { promptTokens, completionTokens });
+
+    const premiumPromptRate = premiumTokenValues[model].prompt;
+    const premiumCompletionRate = premiumTokenValues[model].completion;
+    const expectedCost =
+      promptTokens * premiumPromptRate + completionTokens * premiumCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+
+  test('spendTokens should apply standard pricing at exactly the premium threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-6';
+    const promptTokens = premiumTokenValues[model].threshold;
+    const completionTokens = 500;
+
+    const txData = {
+      user: userId,
+      conversationId: 'test-premium-exact',
+      model,
+      context: 'test',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    await spendTokens(txData, { promptTokens, completionTokens });
+
+    const standardPromptRate = tokenValues[model].prompt;
+    const standardCompletionRate = tokenValues[model].completion;
+    const expectedCost =
+      promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+
+  test('spendStructuredTokens should apply premium pricing when total input tokens exceed threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-6';
+    const txData = {
+      user: userId,
+      conversationId: 'test-structured-premium',
+      model,
+      context: 'message',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    const tokenUsage = {
+      promptTokens: {
+        input: 200000,
+        write: 10000,
+        read: 5000,
+      },
+      completionTokens: 1000,
+    };
+
+    const totalInput =
+      tokenUsage.promptTokens.input + tokenUsage.promptTokens.write + tokenUsage.promptTokens.read;
+
+    await spendStructuredTokens(txData, tokenUsage);
+
+    const premiumPromptRate = premiumTokenValues[model].prompt;
+    const premiumCompletionRate = premiumTokenValues[model].completion;
+    const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
+    const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
+
+    const expectedPromptCost =
+      tokenUsage.promptTokens.input * premiumPromptRate +
+      tokenUsage.promptTokens.write * writeMultiplier +
+      tokenUsage.promptTokens.read * readMultiplier;
+    const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
+    const expectedTotalCost = expectedPromptCost + expectedCompletionCost;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(totalInput).toBeGreaterThan(premiumTokenValues[model].threshold);
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
+  });
+
+  test('spendStructuredTokens should apply standard pricing when total input tokens are below threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-6';
+    const txData = {
+      user: userId,
+      conversationId: 'test-structured-standard',
+      model,
+      context: 'message',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    const tokenUsage = {
+      promptTokens: {
+        input: 50000,
+        write: 10000,
+        read: 5000,
+      },
+      completionTokens: 1000,
+    };
+
+    const totalInput =
+      tokenUsage.promptTokens.input + tokenUsage.promptTokens.write + tokenUsage.promptTokens.read;
+
+    await spendStructuredTokens(txData, tokenUsage);
+
+    const standardPromptRate = tokenValues[model].prompt;
+    const standardCompletionRate = tokenValues[model].completion;
+    const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
+    const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
+
+    const expectedPromptCost =
+      tokenUsage.promptTokens.input * standardPromptRate +
+      tokenUsage.promptTokens.write * writeMultiplier +
+      tokenUsage.promptTokens.read * readMultiplier;
+    const expectedCompletionCost = tokenUsage.completionTokens * standardCompletionRate;
+    const expectedTotalCost = expectedPromptCost + expectedCompletionCost;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(totalInput).toBeLessThanOrEqual(premiumTokenValues[model].threshold);
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
+  });
+
+  test('spendTokens should apply standard pricing for gemini-3.1-pro-preview below threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'gemini-3.1-pro-preview';
+    const promptTokens = 100000;
+    const completionTokens = 500;
+
+    const txData = {
+      user: userId,
+      conversationId: 'test-gemini31-below',
+      model,
+      context: 'test',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    await spendTokens(txData, { promptTokens, completionTokens });
+
+    const standardPromptRate = tokenValues['gemini-3.1'].prompt;
+    const standardCompletionRate = tokenValues['gemini-3.1'].completion;
+    const expectedCost =
+      promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+
+  test('spendTokens should apply premium pricing for gemini-3.1-pro-preview above threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'gemini-3.1-pro-preview';
+    const promptTokens = 250000;
+    const completionTokens = 500;
+
+    const txData = {
+      user: userId,
+      conversationId: 'test-gemini31-above',
+      model,
+      context: 'test',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    await spendTokens(txData, { promptTokens, completionTokens });
+
+    const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
+    const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
+    const expectedCost =
+      promptTokens * premiumPromptRate + completionTokens * premiumCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+
+  test('spendTokens should apply standard pricing for gemini-3.1-pro-preview at exactly the threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'gemini-3.1-pro-preview';
+    const promptTokens = premiumTokenValues['gemini-3.1'].threshold;
+    const completionTokens = 500;
+
+    const txData = {
+      user: userId,
+      conversationId: 'test-gemini31-exact',
+      model,
+      context: 'test',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    await spendTokens(txData, { promptTokens, completionTokens });
+
+    const standardPromptRate = tokenValues['gemini-3.1'].prompt;
+    const standardCompletionRate = tokenValues['gemini-3.1'].completion;
+    const expectedCost =
+      promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+
+  test('spendStructuredTokens should apply premium pricing for gemini-3.1 when total input exceeds threshold', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'gemini-3.1-pro-preview';
+    const txData = {
+      user: userId,
+      conversationId: 'test-gemini31-structured-premium',
+      model,
+      context: 'message',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    const tokenUsage = {
+      promptTokens: {
+        input: 200000,
+        write: 10000,
+        read: 5000,
+      },
+      completionTokens: 1000,
+    };
+
+    const totalInput =
+      tokenUsage.promptTokens.input + tokenUsage.promptTokens.write + tokenUsage.promptTokens.read;
+
+    await spendStructuredTokens(txData, tokenUsage);
+
+    const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
+    const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
+    const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
+    const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
+
+    const expectedPromptCost =
+      tokenUsage.promptTokens.input * premiumPromptRate +
+      tokenUsage.promptTokens.write * writeMultiplier +
+      tokenUsage.promptTokens.read * readMultiplier;
+    const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
+    const expectedTotalCost = expectedPromptCost + expectedCompletionCost;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(totalInput).toBeGreaterThan(premiumTokenValues['gemini-3.1'].threshold);
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
+  });
+
+  test('non-premium models should not be affected by inputTokenCount regardless of prompt size', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-5';
+    const promptTokens = 300000;
+    const completionTokens = 500;
+
+    const txData = {
+      user: userId,
+      conversationId: 'test-no-premium',
+      model,
+      context: 'test',
+      endpointTokenConfig: null,
+      balance: { enabled: true },
+    };
+
+    await spendTokens(txData, { promptTokens, completionTokens });
+
+    const standardPromptRate = getMultiplier({ model, tokenType: 'prompt' });
+    const standardCompletionRate = getMultiplier({ model, tokenType: 'completion' });
+    const expectedCost =
+      promptTokens * standardPromptRate + completionTokens * standardCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+});
+
+describe('Bulk path parity', () => {
+  /**
+   * Each test here mirrors an existing legacy test above, replacing spendTokens/
+   * spendStructuredTokens with recordCollectedUsage + bulk deps.
+   * The balance deduction and transaction document fields must be numerically identical.
+   */
+  let bulkDeps;
+  let methods;
+
+  beforeEach(() => {
+    methods = createMethods(mongoose);
+    bulkDeps = {
+      spendTokens: () => Promise.resolve(),
+      spendStructuredTokens: () => Promise.resolve(),
+      pricing: { getMultiplier, getCacheMultiplier },
+      bulkWriteOps: {
+        insertMany: methods.bulkInsertTransactions,
+        updateBalance: methods.updateBalance,
+      },
+    };
+  });
+
+  test('balance should decrease when spending tokens via bulk path', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 10000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'gpt-3.5-turbo';
+    const promptTokens = 100;
+    const completionTokens = 50;
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-conversation-id',
+      model,
+      context: 'test',
+      balance: { enabled: true },
+      transactions: { enabled: true },
+      collectedUsage: [{ input_tokens: promptTokens, output_tokens: completionTokens, model }],
+    });
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    const promptMultiplier = getMultiplier({
+      model,
+      tokenType: 'prompt',
+      inputTokenCount: promptTokens,
+    });
+    const completionMultiplier = getMultiplier({
+      model,
+      tokenType: 'completion',
+      inputTokenCount: promptTokens,
+    });
+    const expectedTotalCost =
+      promptTokens * promptMultiplier + completionTokens * completionMultiplier;
+    const expectedBalance = initialBalance - expectedTotalCost;
+
+    expect(updatedBalance.tokenCredits).toBeCloseTo(expectedBalance, 0);
+
+    const txns = await Transaction.find({ user: userId }).lean();
+    expect(txns).toHaveLength(2);
+  });
+
+  test('bulk path should not update balance when balance.enabled is false', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 10000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'gpt-3.5-turbo';
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-conversation-id',
+      model,
+      context: 'test',
+      balance: { enabled: false },
+      transactions: { enabled: true },
+      collectedUsage: [{ input_tokens: 100, output_tokens: 50, model }],
+    });
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBe(initialBalance);
+    const txns = await Transaction.find({ user: userId }).lean();
+    expect(txns).toHaveLength(2); // transactions still recorded
+  });
+
+  test('bulk path should not insert when transactions.enabled is false', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 10000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-conversation-id',
+      model: 'gpt-3.5-turbo',
+      context: 'test',
+      balance: { enabled: true },
+      transactions: { enabled: false },
+      collectedUsage: [{ input_tokens: 100, output_tokens: 50, model: 'gpt-3.5-turbo' }],
+    });
+
+    const txns = await Transaction.find({ user: userId }).lean();
+    expect(txns).toHaveLength(0);
+    const balance = await Balance.findOne({ user: userId });
+    expect(balance.tokenCredits).toBe(initialBalance);
+  });
+
+  test('bulk path handles incomplete context for completion tokens — same CANCEL_RATE as legacy', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 17613154.55;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-3-5-sonnet';
+    const promptTokens = 10;
+    const completionTokens = 50;
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-convo',
+      model,
+      context: 'incomplete',
+      balance: { enabled: true },
+      transactions: { enabled: true },
+      collectedUsage: [{ input_tokens: promptTokens, output_tokens: completionTokens, model }],
+    });
+
+    const txns = await Transaction.find({ user: userId }).lean();
+    const completionTx = txns.find((t) => t.tokenType === 'completion');
+    const completionMultiplier = getMultiplier({
+      model,
+      tokenType: 'completion',
+      inputTokenCount: promptTokens,
+    });
+    expect(completionTx.tokenValue).toBeCloseTo(-completionTokens * completionMultiplier * 1.15, 0);
+  });
+
+  test('bulk path structured tokens — balance deduction matches legacy spendStructuredTokens', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 17613154.55;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-3-5-sonnet';
+    const promptInput = 11;
+    const promptWrite = 140522;
+    const promptRead = 0;
+    const completionTokens = 5;
+    const totalInput = promptInput + promptWrite + promptRead;
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-convo',
+      model,
+      context: 'message',
+      balance: { enabled: true },
+      transactions: { enabled: true },
+      collectedUsage: [
+        {
+          input_tokens: promptInput,
+          output_tokens: completionTokens,
+          model,
+          input_token_details: { cache_creation: promptWrite, cache_read: promptRead },
+        },
+      ],
+    });
+
+    const promptMultiplier = getMultiplier({
+      model,
+      tokenType: 'prompt',
+      inputTokenCount: totalInput,
+    });
+    const completionMultiplier = getMultiplier({
+      model,
+      tokenType: 'completion',
+      inputTokenCount: totalInput,
+    });
+    const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' }) ?? promptMultiplier;
+    const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' }) ?? promptMultiplier;
+
+    const expectedPromptCost =
+      promptInput * promptMultiplier + promptWrite * writeMultiplier + promptRead * readMultiplier;
+    const expectedCompletionCost = completionTokens * completionMultiplier;
+    const expectedTotalCost = expectedPromptCost + expectedCompletionCost;
+    const expectedBalance = initialBalance - expectedTotalCost;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(Math.abs(updatedBalance.tokenCredits - expectedBalance)).toBeLessThan(100);
+  });
+
+  test('premium pricing above threshold via bulk path — same balance as legacy', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-6';
+    const promptTokens = 250000;
+    const completionTokens = 500;
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-premium',
+      model,
+      context: 'test',
+      balance: { enabled: true },
+      transactions: { enabled: true },
+      collectedUsage: [{ input_tokens: promptTokens, output_tokens: completionTokens, model }],
+    });
+
+    const premiumPromptRate = premiumTokenValues[model].prompt;
+    const premiumCompletionRate = premiumTokenValues[model].completion;
+    const expectedCost =
+      promptTokens * premiumPromptRate + completionTokens * premiumCompletionRate;
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+  });
+
+  test('real-world multi-entry batch: 5 sequential tool calls — same total deduction as 5 legacy spendTokens calls', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 100000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    const model = 'claude-opus-4-5-20251101';
+    const calls = [
+      { input_tokens: 31596, output_tokens: 151 },
+      { input_tokens: 35368, output_tokens: 150 },
+      { input_tokens: 58362, output_tokens: 295 },
+      { input_tokens: 112604, output_tokens: 193 },
+      { input_tokens: 257440, output_tokens: 2217 },
+    ];
+
+    let expectedTotalCost = 0;
+    for (const { input_tokens, output_tokens } of calls) {
+      const pm = getMultiplier({ model, tokenType: 'prompt', inputTokenCount: input_tokens });
+      const cm = getMultiplier({ model, tokenType: 'completion', inputTokenCount: input_tokens });
+      expectedTotalCost += input_tokens * pm + output_tokens * cm;
+    }
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-sequential',
+      model,
+      context: 'message',
+      balance: { enabled: true },
+      transactions: { enabled: true },
+      collectedUsage: calls.map((c) => ({ ...c, model })),
+    });
+
+    const txns = await Transaction.find({ user: userId }).lean();
+    expect(txns).toHaveLength(10); // 5 calls × 2 docs (prompt + completion)
+
+    const updatedBalance = await Balance.findOne({ user: userId });
+    expect(updatedBalance.tokenCredits).toBeCloseTo(initialBalance - expectedTotalCost, 0);
+  });
+
+  test('bulk path should save transaction but not update balance when balance disabled, transactions enabled', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 10000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-conversation-id',
+      model: 'gpt-3.5-turbo',
+      context: 'test',
+      balance: { enabled: false },
+      transactions: { enabled: true },
+      collectedUsage: [{ input_tokens: 100, output_tokens: 50, model: 'gpt-3.5-turbo' }],
+    });
+
+    const txns = await Transaction.find({ user: userId }).lean();
+    expect(txns).toHaveLength(2);
+    expect(txns[0].rawAmount).toBeDefined();
+    const balance = await Balance.findOne({ user: userId });
+    expect(balance.tokenCredits).toBe(initialBalance);
+  });
+
+  test('bulk path structured tokens should not save when transactions.enabled is false', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 10000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-conversation-id',
+      model: 'claude-3-5-sonnet',
+      context: 'message',
+      balance: { enabled: true },
+      transactions: { enabled: false },
+      collectedUsage: [
+        {
+          input_tokens: 10,
+          output_tokens: 5,
+          model: 'claude-3-5-sonnet',
+          input_token_details: { cache_creation: 100, cache_read: 5 },
+        },
+      ],
+    });
+
+    const txns = await Transaction.find({ user: userId }).lean();
+    expect(txns).toHaveLength(0);
+    const balance = await Balance.findOne({ user: userId });
+    expect(balance.tokenCredits).toBe(initialBalance);
+  });
+
+  test('bulk path structured tokens should save but not update balance when balance disabled', async () => {
+    const userId = new mongoose.Types.ObjectId();
+    const initialBalance = 10000000;
+    await Balance.create({ user: userId, tokenCredits: initialBalance });
+
+    await recordCollectedUsage(bulkDeps, {
+      user: userId.toString(),
+      conversationId: 'test-conversation-id',
+      model: 'claude-3-5-sonnet',
+      context: 'message',
+      balance: { enabled: false },
+      transactions: { enabled: true },
+      collectedUsage: [
+        {
+          input_tokens: 10,
+          output_tokens: 5,
+          model: 'claude-3-5-sonnet',
+          input_token_details: { cache_creation: 100, cache_read: 5 },
+        },
+      ],
+    });
+
+    const txns = await Transaction.find({ user: userId }).lean();
+    expect(txns).toHaveLength(2);
+    const promptTx = txns.find((t) => t.tokenType === 'prompt');
+    expect(promptTx.inputTokens).toBe(-10);
+    expect(promptTx.writeTokens).toBe(-100);
+    expect(promptTx.readTokens).toBe(-5);
+    const balance = await Balance.findOne({ user: userId });
+    expect(balance.tokenCredits).toBe(initialBalance);
+  });
+});
--- a/api/models/spendTokens.js
+++ b/api/models/spendTokens.js
@ -24,12 +24,14 @@ const spendTokens = async (txData, tokenUsage) => {
    },
  );
  let prompt, completion;
+  const normalizedPromptTokens = Math.max(promptTokens ?? 0, 0);
  try {
    if (promptTokens !== undefined) {
      prompt = await createTransaction({
        ...txData,
        tokenType: 'prompt',
-        rawAmount: promptTokens === 0 ? 0 : -Math.max(promptTokens, 0),
+        rawAmount: promptTokens === 0 ? 0 : -normalizedPromptTokens,
+        inputTokenCount: normalizedPromptTokens,
      });
    }

@ -38,6 +40,7 @@ const spendTokens = async (txData, tokenUsage) => {
        ...txData,
        tokenType: 'completion',
        rawAmount: completionTokens === 0 ? 0 : -Math.max(completionTokens, 0),
+        inputTokenCount: normalizedPromptTokens,
      });
    }

@ -87,21 +90,31 @@ const spendStructuredTokens = async (txData, tokenUsage) => {
  let prompt, completion;
  try {
    if (promptTokens) {
-      const { input = 0, write = 0, read = 0 } = promptTokens;
+      const input = Math.max(promptTokens.input ?? 0, 0);
+      const write = Math.max(promptTokens.write ?? 0, 0);
+      const read = Math.max(promptTokens.read ?? 0, 0);
+      const totalInputTokens = input + write + read;
      prompt = await createStructuredTransaction({
        ...txData,
        tokenType: 'prompt',
        inputTokens: -input,
        writeTokens: -write,
        readTokens: -read,
+        inputTokenCount: totalInputTokens,
      });
    }

    if (completionTokens) {
+      const totalInputTokens = promptTokens
+        ? Math.max(promptTokens.input ?? 0, 0) +
+          Math.max(promptTokens.write ?? 0, 0) +
+          Math.max(promptTokens.read ?? 0, 0)
+        : undefined;
      completion = await createTransaction({
        ...txData,
        tokenType: 'completion',
-        rawAmount: -completionTokens,
+        rawAmount: -Math.max(completionTokens, 0),
+        inputTokenCount: totalInputTokens,
      });
    }

--- a/api/models/spendTokens.spec.js
+++ b/api/models/spendTokens.spec.js
@ -1,7 +1,8 @@
 const mongoose = require('mongoose');
 const { MongoMemoryServer } = require('mongodb-memory-server');
-const { spendTokens, spendStructuredTokens } = require('./spendTokens');
 const { createTransaction, createAutoRefillTransaction } = require('./Transaction');
+const { tokenValues, premiumTokenValues, getCacheMultiplier } = require('./tx');
+const { spendTokens, spendStructuredTokens } = require('./spendTokens');

 require('~/db/models');

@ -734,4 +735,457 @@ describe('spendTokens', () => {
    expect(balance).toBeDefined();
    expect(balance.tokenCredits).toBeLessThan(10000); // Balance should be reduced
  });
+
+  describe('premium token pricing', () => {
+    it('should charge standard rates for claude-opus-4-6 when prompt tokens are below threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-6';
+      const promptTokens = 100000;
+      const completionTokens = 500;
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-standard-pricing',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens, completionTokens });
+
+      const expectedCost =
+        promptTokens * tokenValues[model].prompt + completionTokens * tokenValues[model].completion;
+
+      const balance = await Balance.findOne({ user: userId });
+      expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+    });
+
+    it('should charge premium rates for claude-opus-4-6 when prompt tokens exceed threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-6';
+      const promptTokens = 250000;
+      const completionTokens = 500;
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-premium-pricing',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens, completionTokens });
+
+      const expectedCost =
+        promptTokens * premiumTokenValues[model].prompt +
+        completionTokens * premiumTokenValues[model].completion;
+
+      const balance = await Balance.findOne({ user: userId });
+      expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+    });
+
+    it('should charge premium rates for both prompt and completion in structured tokens when above threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-6';
+      const txData = {
+        user: userId,
+        conversationId: 'test-structured-premium',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      const tokenUsage = {
+        promptTokens: {
+          input: 200000,
+          write: 10000,
+          read: 5000,
+        },
+        completionTokens: 1000,
+      };
+
+      const result = await spendStructuredTokens(txData, tokenUsage);
+
+      const premiumPromptRate = premiumTokenValues[model].prompt;
+      const premiumCompletionRate = premiumTokenValues[model].completion;
+      const writeRate = getCacheMultiplier({ model, cacheType: 'write' });
+      const readRate = getCacheMultiplier({ model, cacheType: 'read' });
+
+      const expectedPromptCost =
+        tokenUsage.promptTokens.input * premiumPromptRate +
+        tokenUsage.promptTokens.write * writeRate +
+        tokenUsage.promptTokens.read * readRate;
+      const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
+
+      expect(result.prompt.prompt).toBeCloseTo(-expectedPromptCost, 0);
+      expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0);
+    });
+
+    it('should charge standard rates for structured tokens when below threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-6';
+      const txData = {
+        user: userId,
+        conversationId: 'test-structured-standard',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      const tokenUsage = {
+        promptTokens: {
+          input: 50000,
+          write: 10000,
+          read: 5000,
+        },
+        completionTokens: 1000,
+      };
+
+      const result = await spendStructuredTokens(txData, tokenUsage);
+
+      const standardPromptRate = tokenValues[model].prompt;
+      const standardCompletionRate = tokenValues[model].completion;
+      const writeRate = getCacheMultiplier({ model, cacheType: 'write' });
+      const readRate = getCacheMultiplier({ model, cacheType: 'read' });
+
+      const expectedPromptCost =
+        tokenUsage.promptTokens.input * standardPromptRate +
+        tokenUsage.promptTokens.write * writeRate +
+        tokenUsage.promptTokens.read * readRate;
+      const expectedCompletionCost = tokenUsage.completionTokens * standardCompletionRate;
+
+      expect(result.prompt.prompt).toBeCloseTo(-expectedPromptCost, 0);
+      expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0);
+    });
+
+    it('should charge standard rates for gemini-3.1-pro-preview when prompt tokens are below threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'gemini-3.1-pro-preview';
+      const promptTokens = 100000;
+      const completionTokens = 500;
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-gemini31-standard-pricing',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens, completionTokens });
+
+      const expectedCost =
+        promptTokens * tokenValues['gemini-3.1'].prompt +
+        completionTokens * tokenValues['gemini-3.1'].completion;
+
+      const balance = await Balance.findOne({ user: userId });
+      expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+    });
+
+    it('should charge premium rates for gemini-3.1-pro-preview when prompt tokens exceed threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'gemini-3.1-pro-preview';
+      const promptTokens = 250000;
+      const completionTokens = 500;
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-gemini31-premium-pricing',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens, completionTokens });
+
+      const expectedCost =
+        promptTokens * premiumTokenValues['gemini-3.1'].prompt +
+        completionTokens * premiumTokenValues['gemini-3.1'].completion;
+
+      const balance = await Balance.findOne({ user: userId });
+      expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+    });
+
+    it('should charge premium rates for gemini-3.1-pro-preview-customtools when prompt tokens exceed threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'gemini-3.1-pro-preview-customtools';
+      const promptTokens = 250000;
+      const completionTokens = 500;
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-gemini31-customtools-premium',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens, completionTokens });
+
+      const expectedCost =
+        promptTokens * premiumTokenValues['gemini-3.1'].prompt +
+        completionTokens * premiumTokenValues['gemini-3.1'].completion;
+
+      const balance = await Balance.findOne({ user: userId });
+      expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+    });
+
+    it('should charge premium rates for structured gemini-3.1 tokens when total input exceeds threshold', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'gemini-3.1-pro-preview';
+      const txData = {
+        user: userId,
+        conversationId: 'test-gemini31-structured-premium',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      const tokenUsage = {
+        promptTokens: {
+          input: 200000,
+          write: 10000,
+          read: 5000,
+        },
+        completionTokens: 1000,
+      };
+
+      const result = await spendStructuredTokens(txData, tokenUsage);
+
+      const premiumPromptRate = premiumTokenValues['gemini-3.1'].prompt;
+      const premiumCompletionRate = premiumTokenValues['gemini-3.1'].completion;
+      const writeRate = getCacheMultiplier({ model, cacheType: 'write' });
+      const readRate = getCacheMultiplier({ model, cacheType: 'read' });
+
+      const expectedPromptCost =
+        tokenUsage.promptTokens.input * premiumPromptRate +
+        tokenUsage.promptTokens.write * writeRate +
+        tokenUsage.promptTokens.read * readRate;
+      const expectedCompletionCost = tokenUsage.completionTokens * premiumCompletionRate;
+
+      expect(result.prompt.prompt).toBeCloseTo(-expectedPromptCost, 0);
+      expect(result.completion.completion).toBeCloseTo(-expectedCompletionCost, 0);
+    });
+
+    it('should not apply premium pricing to non-premium models regardless of prompt size', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-5';
+      const promptTokens = 300000;
+      const completionTokens = 500;
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-no-premium',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens, completionTokens });
+
+      const expectedCost =
+        promptTokens * tokenValues[model].prompt + completionTokens * tokenValues[model].completion;
+
+      const balance = await Balance.findOne({ user: userId });
+      expect(balance.tokenCredits).toBeCloseTo(initialBalance - expectedCost, 0);
+    });
+  });
+
+  describe('inputTokenCount Normalization', () => {
+    it('should normalize negative promptTokens to zero for inputTokenCount', async () => {
+      await Balance.create({
+        user: userId,
+        tokenCredits: 100000000,
+      });
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-negative-prompt',
+        model: 'claude-opus-4-6',
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens: -500, completionTokens: 100 });
+
+      const transactions = await Transaction.find({ user: userId }).sort({ tokenType: 1 });
+
+      const completionTx = transactions.find((t) => t.tokenType === 'completion');
+      const promptTx = transactions.find((t) => t.tokenType === 'prompt');
+
+      expect(Math.abs(promptTx.rawAmount)).toBe(0);
+      expect(completionTx.rawAmount).toBe(-100);
+
+      const standardCompletionRate = tokenValues['claude-opus-4-6'].completion;
+      expect(completionTx.rate).toBe(standardCompletionRate);
+    });
+
+    it('should use normalized inputTokenCount for premium threshold check on completion', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-6';
+      const promptTokens = 250000;
+      const completionTokens = 500;
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-normalized-premium',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens, completionTokens });
+
+      const transactions = await Transaction.find({ user: userId }).sort({ tokenType: 1 });
+      const completionTx = transactions.find((t) => t.tokenType === 'completion');
+      const promptTx = transactions.find((t) => t.tokenType === 'prompt');
+
+      const premiumPromptRate = premiumTokenValues[model].prompt;
+      const premiumCompletionRate = premiumTokenValues[model].completion;
+      expect(promptTx.rate).toBe(premiumPromptRate);
+      expect(completionTx.rate).toBe(premiumCompletionRate);
+    });
+
+    it('should keep inputTokenCount as zero when promptTokens is zero', async () => {
+      await Balance.create({
+        user: userId,
+        tokenCredits: 100000000,
+      });
+
+      const txData = {
+        user: userId,
+        conversationId: 'test-zero-prompt',
+        model: 'claude-opus-4-6',
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens: 0, completionTokens: 100 });
+
+      const transactions = await Transaction.find({ user: userId }).sort({ tokenType: 1 });
+      const completionTx = transactions.find((t) => t.tokenType === 'completion');
+      const promptTx = transactions.find((t) => t.tokenType === 'prompt');
+
+      expect(Math.abs(promptTx.rawAmount)).toBe(0);
+
+      const standardCompletionRate = tokenValues['claude-opus-4-6'].completion;
+      expect(completionTx.rate).toBe(standardCompletionRate);
+    });
+
+    it('should not trigger premium pricing with negative promptTokens on premium model', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-6';
+      const txData = {
+        user: userId,
+        conversationId: 'test-negative-no-premium',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      await spendTokens(txData, { promptTokens: -300000, completionTokens: 500 });
+
+      const transactions = await Transaction.find({ user: userId }).sort({ tokenType: 1 });
+      const completionTx = transactions.find((t) => t.tokenType === 'completion');
+
+      const standardCompletionRate = tokenValues[model].completion;
+      expect(completionTx.rate).toBe(standardCompletionRate);
+    });
+
+    it('should normalize negative structured token values to zero in spendStructuredTokens', async () => {
+      const initialBalance = 100000000;
+      await Balance.create({
+        user: userId,
+        tokenCredits: initialBalance,
+      });
+
+      const model = 'claude-opus-4-6';
+      const txData = {
+        user: userId,
+        conversationId: 'test-negative-structured',
+        model,
+        context: 'test',
+        balance: { enabled: true },
+      };
+
+      const tokenUsage = {
+        promptTokens: { input: -100, write: 50, read: -30 },
+        completionTokens: -200,
+      };
+
+      await spendStructuredTokens(txData, tokenUsage);
+
+      const transactions = await Transaction.find({
+        user: userId,
+        conversationId: 'test-negative-structured',
+      }).sort({ tokenType: 1 });
+
+      const completionTx = transactions.find((t) => t.tokenType === 'completion');
+      const promptTx = transactions.find((t) => t.tokenType === 'prompt');
+
+      expect(Math.abs(promptTx.inputTokens)).toBe(0);
+      expect(promptTx.writeTokens).toBe(-50);
+      expect(Math.abs(promptTx.readTokens)).toBe(0);
+
+      expect(Math.abs(completionTx.rawAmount)).toBe(0);
+
+      const standardRate = tokenValues[model].completion;
+      expect(completionTx.rate).toBe(standardRate);
+    });
+  });
 });
--- a/api/models/tx.js
+++ b/api/models/tx.js
@ -1,10 +1,27 @@
 const { matchModelName, findMatchingPattern } = require('@librechat/api');
 const defaultRate = 6;

+/**
+ * Token Pricing Configuration
+ *
+ * Pattern Matching
+ * ================
+ * `findMatchingPattern` (from @librechat/api) uses `modelName.includes(key)` and selects
+ * the LONGEST matching key. If a key's length equals the model name's length (exact match),
+ * it returns immediately. Definition order does NOT affect correctness.
+ *
+ * Key ordering matters only for:
+ *   1. Performance: list older/less common models first so newer/common models
+ *      are found earlier in the reverse scan.
+ *   2. Same-length tie-breaking: the last-defined key wins on equal-length matches.
+ *
+ * This applies to BOTH `tokenValues` and `cacheTokenValues` objects.
+ */
+
 /**
 * AWS Bedrock pricing
 * source: https://aws.amazon.com/bedrock/pricing/
- * */
+ */
 const bedrockValues = {
  // Basic llama2 patterns (base defaults to smallest variant)
  llama2: { prompt: 0.75, completion: 1.0 },
@ -80,6 +97,11 @@ const bedrockValues = {
  'nova-pro': { prompt: 0.8, completion: 3.2 },
  'nova-premier': { prompt: 2.5, completion: 12.5 },
  'deepseek.r1': { prompt: 1.35, completion: 5.4 },
+  // Moonshot/Kimi models on Bedrock
+  'moonshot.kimi': { prompt: 0.6, completion: 2.5 },
+  'moonshot.kimi-k2': { prompt: 0.6, completion: 2.5 },
+  'moonshot.kimi-k2.5': { prompt: 0.6, completion: 3.0 },
+  'moonshot.kimi-k2-thinking': { prompt: 0.6, completion: 2.5 },
 };

 /**
@ -115,9 +137,14 @@ const tokenValues = Object.assign(
    'gpt-5': { prompt: 1.25, completion: 10 },
    'gpt-5.1': { prompt: 1.25, completion: 10 },
    'gpt-5.2': { prompt: 1.75, completion: 14 },
+    'gpt-5.3': { prompt: 1.75, completion: 14 },
+    'gpt-5.4': { prompt: 2.5, completion: 15 },
+    // TODO: gpt-5.4-pro pricing not yet officially published — verify before release
+    'gpt-5.4-pro': { prompt: 5, completion: 30 },
    'gpt-5-nano': { prompt: 0.05, completion: 0.4 },
    'gpt-5-mini': { prompt: 0.25, completion: 2 },
    'gpt-5-pro': { prompt: 15, completion: 120 },
+    'gpt-5.2-pro': { prompt: 21, completion: 168 },
    o1: { prompt: 15, completion: 60 },
    'o1-mini': { prompt: 1.1, completion: 4.4 },
    'o1-preview': { prompt: 15, completion: 60 },
@ -139,7 +166,9 @@ const tokenValues = Object.assign(
    'claude-haiku-4-5': { prompt: 1, completion: 5 },
    'claude-opus-4': { prompt: 15, completion: 75 },
    'claude-opus-4-5': { prompt: 5, completion: 25 },
+    'claude-opus-4-6': { prompt: 5, completion: 25 },
    'claude-sonnet-4': { prompt: 3, completion: 15 },
+    'claude-sonnet-4-6': { prompt: 3, completion: 15 },
    'command-r': { prompt: 0.5, completion: 1.5 },
    'command-r-plus': { prompt: 3, completion: 15 },
    'command-text': { prompt: 1.5, completion: 2.0 },
@ -163,6 +192,8 @@ const tokenValues = Object.assign(
    'gemini-2.5-flash-image': { prompt: 0.15, completion: 30 },
    'gemini-3': { prompt: 2, completion: 12 },
    'gemini-3-pro-image': { prompt: 2, completion: 120 },
+    'gemini-3.1': { prompt: 2, completion: 12 },
+    'gemini-3.1-flash-lite': { prompt: 0.25, completion: 1.5 },
    'gemini-pro-vision': { prompt: 0.5, completion: 1.5 },
    grok: { prompt: 2.0, completion: 10.0 }, // Base pattern defaults to grok-2
    'grok-beta': { prompt: 5.0, completion: 15.0 },
@ -189,7 +220,31 @@ const tokenValues = Object.assign(
    'pixtral-large': { prompt: 2.0, completion: 6.0 },
    'mistral-large': { prompt: 2.0, completion: 6.0 },
    'mixtral-8x22b': { prompt: 0.65, completion: 0.65 },
-    kimi: { prompt: 0.14, completion: 2.49 }, // Base pattern (using kimi-k2 pricing)
+    // Moonshot/Kimi models (base patterns first, specific patterns last for correct matching)
+    kimi: { prompt: 0.6, completion: 2.5 }, // Base pattern
+    moonshot: { prompt: 2.0, completion: 5.0 }, // Base pattern (using 128k pricing)
+    'kimi-latest': { prompt: 0.2, completion: 2.0 }, // Uses 8k/32k/128k pricing dynamically
+    'kimi-k2': { prompt: 0.6, completion: 2.5 },
+    'kimi-k2.5': { prompt: 0.6, completion: 3.0 },
+    'kimi-k2-turbo': { prompt: 1.15, completion: 8.0 },
+    'kimi-k2-turbo-preview': { prompt: 1.15, completion: 8.0 },
+    'kimi-k2-0905': { prompt: 0.6, completion: 2.5 },
+    'kimi-k2-0905-preview': { prompt: 0.6, completion: 2.5 },
+    'kimi-k2-0711': { prompt: 0.6, completion: 2.5 },
+    'kimi-k2-0711-preview': { prompt: 0.6, completion: 2.5 },
+    'kimi-k2-thinking': { prompt: 0.6, completion: 2.5 },
+    'kimi-k2-thinking-turbo': { prompt: 1.15, completion: 8.0 },
+    'moonshot-v1': { prompt: 2.0, completion: 5.0 },
+    'moonshot-v1-auto': { prompt: 2.0, completion: 5.0 },
+    'moonshot-v1-8k': { prompt: 0.2, completion: 2.0 },
+    'moonshot-v1-8k-vision': { prompt: 0.2, completion: 2.0 },
+    'moonshot-v1-8k-vision-preview': { prompt: 0.2, completion: 2.0 },
+    'moonshot-v1-32k': { prompt: 1.0, completion: 3.0 },
+    'moonshot-v1-32k-vision': { prompt: 1.0, completion: 3.0 },
+    'moonshot-v1-32k-vision-preview': { prompt: 1.0, completion: 3.0 },
+    'moonshot-v1-128k': { prompt: 2.0, completion: 5.0 },
+    'moonshot-v1-128k-vision': { prompt: 2.0, completion: 5.0 },
+    'moonshot-v1-128k-vision-preview': { prompt: 2.0, completion: 5.0 },
    // GPT-OSS models (specific sizes)
    'gpt-oss:20b': { prompt: 0.05, completion: 0.2 },
    'gpt-oss-20b': { prompt: 0.05, completion: 0.2 },
@ -249,12 +304,64 @@ const cacheTokenValues = {
  'claude-3-haiku': { write: 0.3, read: 0.03 },
  'claude-haiku-4-5': { write: 1.25, read: 0.1 },
  'claude-sonnet-4': { write: 3.75, read: 0.3 },
+  'claude-sonnet-4-6': { write: 3.75, read: 0.3 },
  'claude-opus-4': { write: 18.75, read: 1.5 },
  'claude-opus-4-5': { write: 6.25, read: 0.5 },
+  'claude-opus-4-6': { write: 6.25, read: 0.5 },
+  // OpenAI models — cached input discount varies by family:
+  //   gpt-4o (incl. mini), o1 (incl. mini/preview): 50% off
+  //   gpt-4.1 (incl. mini/nano), o3 (incl. mini), o4-mini: 75% off
+  //   gpt-5.x (excl. pro variants): 90% off
+  //   gpt-5-pro, gpt-5.2-pro, gpt-5.4-pro: no caching
+  'gpt-4o': { write: 2.5, read: 1.25 },
+  'gpt-4o-mini': { write: 0.15, read: 0.075 },
+  'gpt-4.1': { write: 2, read: 0.5 },
+  'gpt-4.1-mini': { write: 0.4, read: 0.1 },
+  'gpt-4.1-nano': { write: 0.1, read: 0.025 },
+  'gpt-5': { write: 1.25, read: 0.125 },
+  'gpt-5.1': { write: 1.25, read: 0.125 },
+  'gpt-5.2': { write: 1.75, read: 0.175 },
+  'gpt-5.3': { write: 1.75, read: 0.175 },
+  'gpt-5.4': { write: 2.5, read: 0.25 },
+  'gpt-5-mini': { write: 0.25, read: 0.025 },
+  'gpt-5-nano': { write: 0.05, read: 0.005 },
+  o1: { write: 15, read: 7.5 },
+  'o1-mini': { write: 1.1, read: 0.55 },
+  'o1-preview': { write: 15, read: 7.5 },
+  o3: { write: 2, read: 0.5 },
+  'o3-mini': { write: 1.1, read: 0.275 },
+  'o4-mini': { write: 1.1, read: 0.275 },
  // DeepSeek models - cache hit: $0.028/1M, cache miss: $0.28/1M
  deepseek: { write: 0.28, read: 0.028 },
  'deepseek-chat': { write: 0.28, read: 0.028 },
  'deepseek-reasoner': { write: 0.28, read: 0.028 },
+  // Moonshot/Kimi models - cache hit: $0.15/1M (k2) or $0.10/1M (k2.5), cache miss: $0.60/1M
+  kimi: { write: 0.6, read: 0.15 },
+  'kimi-k2': { write: 0.6, read: 0.15 },
+  'kimi-k2.5': { write: 0.6, read: 0.1 },
+  'kimi-k2-turbo': { write: 1.15, read: 0.15 },
+  'kimi-k2-turbo-preview': { write: 1.15, read: 0.15 },
+  'kimi-k2-0905': { write: 0.6, read: 0.15 },
+  'kimi-k2-0905-preview': { write: 0.6, read: 0.15 },
+  'kimi-k2-0711': { write: 0.6, read: 0.15 },
+  'kimi-k2-0711-preview': { write: 0.6, read: 0.15 },
+  'kimi-k2-thinking': { write: 0.6, read: 0.15 },
+  'kimi-k2-thinking-turbo': { write: 1.15, read: 0.15 },
+  // Gemini 3.1 Pro - cache write: $2.00/1M, cache read: $0.20/1M
+  'gemini-3.1': { write: 2, read: 0.2 },
+  // Gemini 3.1 Flash-Lite - cache write: $0.25/1M, cache read: $0.025/1M
+  'gemini-3.1-flash-lite': { write: 0.25, read: 0.025 },
+};
+
+/**
+ * Premium (tiered) pricing for models whose rates change based on prompt size.
+ * Each entry specifies the token threshold and the rates that apply above it.
+ * @type {Object.<string, {threshold: number, prompt: number, completion: number}>}
+ */
+const premiumTokenValues = {
+  'claude-opus-4-6': { threshold: 200000, prompt: 10, completion: 37.5 },
+  'claude-sonnet-4-6': { threshold: 200000, prompt: 6, completion: 22.5 },
+  'gemini-3.1': { threshold: 200000, prompt: 4, completion: 18 },
 };

 /**
@ -313,15 +420,27 @@ const getValueKey = (model, endpoint) => {
 * @param {string} [params.model] - The model name to derive the value key from if not provided.
 * @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
 * @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
+ * @param {number} [params.inputTokenCount] - Total input token count for tiered pricing.
 * @returns {number} The multiplier for the given parameters, or a default value if not found.
 */
-const getMultiplier = ({ valueKey, tokenType, model, endpoint, endpointTokenConfig }) => {
+const getMultiplier = ({
+  model,
+  valueKey,
+  endpoint,
+  tokenType,
+  inputTokenCount,
+  endpointTokenConfig,
+}) => {
  if (endpointTokenConfig) {
    return endpointTokenConfig?.[model]?.[tokenType] ?? defaultRate;
  }

  if (valueKey && tokenType) {
-    return tokenValues[valueKey][tokenType] ?? defaultRate;
+    const premiumRate = getPremiumRate(valueKey, tokenType, inputTokenCount);
+    if (premiumRate != null) {
+      return premiumRate;
+    }
+    return tokenValues[valueKey]?.[tokenType] ?? defaultRate;
  }

  if (!tokenType || !model) {
@ -333,10 +452,33 @@ const getMultiplier = ({ valueKey, tokenType, model, endpoint, endpointTokenConf
    return defaultRate;
  }

-  // If we got this far, and values[tokenType] is undefined somehow, return a rough average of default multipliers
+  const premiumRate = getPremiumRate(valueKey, tokenType, inputTokenCount);
+  if (premiumRate != null) {
+    return premiumRate;
+  }
+
  return tokenValues[valueKey]?.[tokenType] ?? defaultRate;
 };

+/**
+ * Checks if premium (tiered) pricing applies and returns the premium rate.
+ * Each model defines its own threshold in `premiumTokenValues`.
+ * @param {string} valueKey
+ * @param {string} tokenType
+ * @param {number} [inputTokenCount]
+ * @returns {number|null}
+ */
+const getPremiumRate = (valueKey, tokenType, inputTokenCount) => {
+  if (inputTokenCount == null) {
+    return null;
+  }
+  const premiumEntry = premiumTokenValues[valueKey];
+  if (!premiumEntry || inputTokenCount <= premiumEntry.threshold) {
+    return null;
+  }
+  return premiumEntry[tokenType] ?? null;
+};
+
 /**
 * Retrieves the cache multiplier for a given value key and token type. If no value key is provided,
 * it attempts to derive it from the model name.
@ -373,8 +515,10 @@ const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointToke

 module.exports = {
  tokenValues,
+  premiumTokenValues,
  getValueKey,
  getMultiplier,
+  getPremiumRate,
  getCacheMultiplier,
  defaultRate,
  cacheTokenValues,
--- a/api/models/tx.spec.js
+++ b/api/models/tx.spec.js
@ -1,3 +1,4 @@
+/** Note: No hard-coded values should be used in this file. */
 const { maxTokensMap } = require('@librechat/api');
 const { EModelEndpoint } = require('librechat-data-provider');
 const {
@ -5,8 +6,10 @@ const {
  tokenValues,
  getValueKey,
  getMultiplier,
+  getPremiumRate,
  cacheTokenValues,
  getCacheMultiplier,
+  premiumTokenValues,
 } = require('./tx');

 describe('getValueKey', () => {
@ -49,6 +52,24 @@ describe('getValueKey', () => {
    expect(getValueKey('openai/gpt-5.2')).toBe('gpt-5.2');
  });

+  it('should return "gpt-5.3" for model name containing "gpt-5.3"', () => {
+    expect(getValueKey('gpt-5.3')).toBe('gpt-5.3');
+    expect(getValueKey('gpt-5.3-chat-latest')).toBe('gpt-5.3');
+    expect(getValueKey('gpt-5.3-codex')).toBe('gpt-5.3');
+    expect(getValueKey('openai/gpt-5.3')).toBe('gpt-5.3');
+  });
+
+  it('should return "gpt-5.4" for model name containing "gpt-5.4"', () => {
+    expect(getValueKey('gpt-5.4')).toBe('gpt-5.4');
+    expect(getValueKey('gpt-5.4-thinking')).toBe('gpt-5.4');
+    expect(getValueKey('openai/gpt-5.4')).toBe('gpt-5.4');
+  });
+
+  it('should return "gpt-5.4-pro" for model name containing "gpt-5.4-pro"', () => {
+    expect(getValueKey('gpt-5.4-pro')).toBe('gpt-5.4-pro');
+    expect(getValueKey('openai/gpt-5.4-pro')).toBe('gpt-5.4-pro');
+  });
+
  it('should return "gpt-3.5-turbo-1106" for model name containing "gpt-3.5-turbo-1106"', () => {
    expect(getValueKey('gpt-3.5-turbo-1106-some-other-info')).toBe('gpt-3.5-turbo-1106');
    expect(getValueKey('openai/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106');
@ -135,6 +156,12 @@ describe('getValueKey', () => {
    expect(getValueKey('gpt-5-pro-preview')).toBe('gpt-5-pro');
  });

+  it('should return "gpt-5.2-pro" for model name containing "gpt-5.2-pro"', () => {
+    expect(getValueKey('gpt-5.2-pro')).toBe('gpt-5.2-pro');
+    expect(getValueKey('gpt-5.2-pro-2025-03-01')).toBe('gpt-5.2-pro');
+    expect(getValueKey('openai/gpt-5.2-pro')).toBe('gpt-5.2-pro');
+  });
+
  it('should return "gpt-4o" for model type of "gpt-4o"', () => {
    expect(getValueKey('gpt-4o-2024-08-06')).toBe('gpt-4o');
    expect(getValueKey('gpt-4o-2024-08-06-0718')).toBe('gpt-4o');
@ -239,6 +266,15 @@ describe('getMultiplier', () => {
    expect(getMultiplier({ valueKey: '8k', tokenType: 'unknownType' })).toBe(defaultRate);
  });

+  it('should return defaultRate if valueKey does not exist in tokenValues', () => {
+    expect(getMultiplier({ valueKey: 'non-existent-model', tokenType: 'prompt' })).toBe(
+      defaultRate,
+    );
+    expect(getMultiplier({ valueKey: 'non-existent-model', tokenType: 'completion' })).toBe(
+      defaultRate,
+    );
+  });
+
  it('should derive the valueKey from the model if not provided', () => {
    expect(getMultiplier({ tokenType: 'prompt', model: 'gpt-4-some-other-info' })).toBe(
      tokenValues['8k'].prompt,
@ -324,6 +360,18 @@ describe('getMultiplier', () => {
    );
  });

+  it('should return the correct multiplier for gpt-5.2-pro', () => {
+    expect(getMultiplier({ model: 'gpt-5.2-pro', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.2-pro'].prompt,
+    );
+    expect(getMultiplier({ model: 'gpt-5.2-pro', tokenType: 'completion' })).toBe(
+      tokenValues['gpt-5.2-pro'].completion,
+    );
+    expect(getMultiplier({ model: 'openai/gpt-5.2-pro', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.2-pro'].prompt,
+    );
+  });
+
  it('should return the correct multiplier for gpt-5.1', () => {
    expect(getMultiplier({ model: 'gpt-5.1', tokenType: 'prompt' })).toBe(
      tokenValues['gpt-5.1'].prompt,
@ -334,8 +382,6 @@ describe('getMultiplier', () => {
    expect(getMultiplier({ model: 'openai/gpt-5.1', tokenType: 'prompt' })).toBe(
      tokenValues['gpt-5.1'].prompt,
    );
-    expect(tokenValues['gpt-5.1'].prompt).toBe(1.25);
-    expect(tokenValues['gpt-5.1'].completion).toBe(10);
  });

  it('should return the correct multiplier for gpt-5.2', () => {
@ -348,8 +394,48 @@ describe('getMultiplier', () => {
    expect(getMultiplier({ model: 'openai/gpt-5.2', tokenType: 'prompt' })).toBe(
      tokenValues['gpt-5.2'].prompt,
    );
-    expect(tokenValues['gpt-5.2'].prompt).toBe(1.75);
-    expect(tokenValues['gpt-5.2'].completion).toBe(14);
+  });
+
+  it('should return the correct multiplier for gpt-5.3', () => {
+    expect(getMultiplier({ model: 'gpt-5.3', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.3'].prompt,
+    );
+    expect(getMultiplier({ model: 'gpt-5.3', tokenType: 'completion' })).toBe(
+      tokenValues['gpt-5.3'].completion,
+    );
+    expect(getMultiplier({ model: 'gpt-5.3-codex', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.3'].prompt,
+    );
+    expect(getMultiplier({ model: 'openai/gpt-5.3', tokenType: 'completion' })).toBe(
+      tokenValues['gpt-5.3'].completion,
+    );
+  });
+
+  it('should return the correct multiplier for gpt-5.4', () => {
+    expect(getMultiplier({ model: 'gpt-5.4', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.4'].prompt,
+    );
+    expect(getMultiplier({ model: 'gpt-5.4', tokenType: 'completion' })).toBe(
+      tokenValues['gpt-5.4'].completion,
+    );
+    expect(getMultiplier({ model: 'gpt-5.4-thinking', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.4'].prompt,
+    );
+    expect(getMultiplier({ model: 'openai/gpt-5.4', tokenType: 'completion' })).toBe(
+      tokenValues['gpt-5.4'].completion,
+    );
+  });
+
+  it('should return the correct multiplier for gpt-5.4-pro', () => {
+    expect(getMultiplier({ model: 'gpt-5.4-pro', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.4-pro'].prompt,
+    );
+    expect(getMultiplier({ model: 'gpt-5.4-pro', tokenType: 'completion' })).toBe(
+      tokenValues['gpt-5.4-pro'].completion,
+    );
+    expect(getMultiplier({ model: 'openai/gpt-5.4-pro', tokenType: 'prompt' })).toBe(
+      tokenValues['gpt-5.4-pro'].prompt,
+    );
  });

  it('should return the correct multiplier for gpt-4o', () => {
@ -815,8 +901,6 @@ describe('Deepseek Model Tests', () => {
    expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'completion' })).toBe(
      tokenValues['deepseek-chat'].completion,
    );
-    expect(tokenValues['deepseek-chat'].prompt).toBe(0.28);
-    expect(tokenValues['deepseek-chat'].completion).toBe(0.42);
  });

  it('should return correct pricing for deepseek-reasoner', () => {
@ -826,8 +910,6 @@ describe('Deepseek Model Tests', () => {
    expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'completion' })).toBe(
      tokenValues['deepseek-reasoner'].completion,
    );
-    expect(tokenValues['deepseek-reasoner'].prompt).toBe(0.28);
-    expect(tokenValues['deepseek-reasoner'].completion).toBe(0.42);
  });

  it('should handle DeepSeek model name variations with provider prefixes', () => {
@ -840,8 +922,8 @@ describe('Deepseek Model Tests', () => {
    modelVariations.forEach((model) => {
      const promptMultiplier = getMultiplier({ model, tokenType: 'prompt' });
      const completionMultiplier = getMultiplier({ model, tokenType: 'completion' });
-      expect(promptMultiplier).toBe(0.28);
-      expect(completionMultiplier).toBe(0.42);
+      expect(promptMultiplier).toBe(tokenValues['deepseek-chat'].prompt);
+      expect(completionMultiplier).toBe(tokenValues['deepseek-chat'].completion);
    });
  });

@ -860,13 +942,13 @@ describe('Deepseek Model Tests', () => {
    );
  });

-  it('should return correct cache pricing values for DeepSeek models', () => {
-    expect(cacheTokenValues['deepseek-chat'].write).toBe(0.28);
-    expect(cacheTokenValues['deepseek-chat'].read).toBe(0.028);
-    expect(cacheTokenValues['deepseek-reasoner'].write).toBe(0.28);
-    expect(cacheTokenValues['deepseek-reasoner'].read).toBe(0.028);
-    expect(cacheTokenValues['deepseek'].write).toBe(0.28);
-    expect(cacheTokenValues['deepseek'].read).toBe(0.028);
+  it('should have consistent cache pricing across DeepSeek model variants', () => {
+    expect(cacheTokenValues['deepseek'].write).toBe(cacheTokenValues['deepseek-chat'].write);
+    expect(cacheTokenValues['deepseek'].read).toBe(cacheTokenValues['deepseek-chat'].read);
+    expect(cacheTokenValues['deepseek-reasoner'].write).toBe(
+      cacheTokenValues['deepseek-chat'].write,
+    );
+    expect(cacheTokenValues['deepseek-reasoner'].read).toBe(cacheTokenValues['deepseek-chat'].read);
  });

  it('should handle DeepSeek cache multipliers with model variations', () => {
@ -875,8 +957,195 @@ describe('Deepseek Model Tests', () => {
    modelVariations.forEach((model) => {
      const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
      const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
-      expect(writeMultiplier).toBe(0.28);
-      expect(readMultiplier).toBe(0.028);
+      expect(writeMultiplier).toBe(cacheTokenValues['deepseek-chat'].write);
+      expect(readMultiplier).toBe(cacheTokenValues['deepseek-chat'].read);
+    });
+  });
+});
+
+describe('Moonshot/Kimi Model Tests - Pricing', () => {
+  describe('Kimi Models', () => {
+    it('should return correct pricing for kimi base pattern', () => {
+      expect(getMultiplier({ model: 'kimi', tokenType: 'prompt' })).toBe(
+        tokenValues['kimi'].prompt,
+      );
+      expect(getMultiplier({ model: 'kimi', tokenType: 'completion' })).toBe(
+        tokenValues['kimi'].completion,
+      );
+    });
+
+    it('should return correct pricing for kimi-k2.5', () => {
+      expect(getMultiplier({ model: 'kimi-k2.5', tokenType: 'prompt' })).toBe(
+        tokenValues['kimi-k2.5'].prompt,
+      );
+      expect(getMultiplier({ model: 'kimi-k2.5', tokenType: 'completion' })).toBe(
+        tokenValues['kimi-k2.5'].completion,
+      );
+    });
+
+    it('should return correct pricing for kimi-k2 series', () => {
+      expect(getMultiplier({ model: 'kimi-k2', tokenType: 'prompt' })).toBe(
+        tokenValues['kimi-k2'].prompt,
+      );
+      expect(getMultiplier({ model: 'kimi-k2', tokenType: 'completion' })).toBe(
+        tokenValues['kimi-k2'].completion,
+      );
+    });
+
+    it('should return correct pricing for kimi-k2-turbo (higher pricing)', () => {
+      expect(getMultiplier({ model: 'kimi-k2-turbo', tokenType: 'prompt' })).toBe(
+        tokenValues['kimi-k2-turbo'].prompt,
+      );
+      expect(getMultiplier({ model: 'kimi-k2-turbo', tokenType: 'completion' })).toBe(
+        tokenValues['kimi-k2-turbo'].completion,
+      );
+    });
+
+    it('should return correct pricing for kimi-k2-thinking models', () => {
+      expect(getMultiplier({ model: 'kimi-k2-thinking', tokenType: 'prompt' })).toBe(
+        tokenValues['kimi-k2-thinking'].prompt,
+      );
+      expect(getMultiplier({ model: 'kimi-k2-thinking', tokenType: 'completion' })).toBe(
+        tokenValues['kimi-k2-thinking'].completion,
+      );
+      expect(getMultiplier({ model: 'kimi-k2-thinking-turbo', tokenType: 'prompt' })).toBe(
+        tokenValues['kimi-k2-thinking-turbo'].prompt,
+      );
+      expect(getMultiplier({ model: 'kimi-k2-thinking-turbo', tokenType: 'completion' })).toBe(
+        tokenValues['kimi-k2-thinking-turbo'].completion,
+      );
+    });
+
+    it('should handle Kimi model variations with provider prefixes', () => {
+      const modelVariations = ['openrouter/kimi-k2', 'openrouter/kimi-k2.5', 'openrouter/kimi'];
+
+      modelVariations.forEach((model) => {
+        const promptMultiplier = getMultiplier({ model, tokenType: 'prompt' });
+        const completionMultiplier = getMultiplier({ model, tokenType: 'completion' });
+        expect(promptMultiplier).toBe(tokenValues['kimi'].prompt);
+        expect([tokenValues['kimi'].completion, tokenValues['kimi-k2.5'].completion]).toContain(
+          completionMultiplier,
+        );
+      });
+    });
+  });
+
+  describe('Moonshot Models', () => {
+    it('should return correct pricing for moonshot base pattern (128k pricing)', () => {
+      expect(getMultiplier({ model: 'moonshot', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot'].completion,
+      );
+    });
+
+    it('should return correct pricing for moonshot-v1-8k', () => {
+      expect(getMultiplier({ model: 'moonshot-v1-8k', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot-v1-8k'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-8k', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot-v1-8k'].completion,
+      );
+    });
+
+    it('should return correct pricing for moonshot-v1-32k', () => {
+      expect(getMultiplier({ model: 'moonshot-v1-32k', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot-v1-32k'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-32k', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot-v1-32k'].completion,
+      );
+    });
+
+    it('should return correct pricing for moonshot-v1-128k', () => {
+      expect(getMultiplier({ model: 'moonshot-v1-128k', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot-v1-128k'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-128k', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot-v1-128k'].completion,
+      );
+    });
+
+    it('should return correct pricing for moonshot-v1 vision models', () => {
+      expect(getMultiplier({ model: 'moonshot-v1-8k-vision', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot-v1-8k-vision'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-8k-vision', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot-v1-8k-vision'].completion,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-32k-vision', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot-v1-32k-vision'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-32k-vision', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot-v1-32k-vision'].completion,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-128k-vision', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot-v1-128k-vision'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot-v1-128k-vision', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot-v1-128k-vision'].completion,
+      );
+    });
+  });
+
+  describe('Kimi Cache Multipliers', () => {
+    it('should return correct cache multipliers for kimi-k2 models', () => {
+      expect(getCacheMultiplier({ model: 'kimi', cacheType: 'write' })).toBe(
+        cacheTokenValues['kimi'].write,
+      );
+      expect(getCacheMultiplier({ model: 'kimi', cacheType: 'read' })).toBe(
+        cacheTokenValues['kimi'].read,
+      );
+    });
+
+    it('should return correct cache multipliers for kimi-k2.5 (lower read price)', () => {
+      expect(getCacheMultiplier({ model: 'kimi-k2.5', cacheType: 'write' })).toBe(
+        cacheTokenValues['kimi-k2.5'].write,
+      );
+      expect(getCacheMultiplier({ model: 'kimi-k2.5', cacheType: 'read' })).toBe(
+        cacheTokenValues['kimi-k2.5'].read,
+      );
+    });
+
+    it('should return correct cache multipliers for kimi-k2-turbo', () => {
+      expect(getCacheMultiplier({ model: 'kimi-k2-turbo', cacheType: 'write' })).toBe(
+        cacheTokenValues['kimi-k2-turbo'].write,
+      );
+      expect(getCacheMultiplier({ model: 'kimi-k2-turbo', cacheType: 'read' })).toBe(
+        cacheTokenValues['kimi-k2-turbo'].read,
+      );
+    });
+
+    it('should handle Kimi cache multipliers with model variations', () => {
+      const modelVariations = ['openrouter/kimi-k2', 'openrouter/kimi'];
+
+      modelVariations.forEach((model) => {
+        const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
+        const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
+        expect(writeMultiplier).toBe(cacheTokenValues['kimi'].write);
+        expect(readMultiplier).toBe(cacheTokenValues['kimi'].read);
+      });
+    });
+  });
+
+  describe('Bedrock Moonshot Models', () => {
+    it('should return correct pricing for Bedrock moonshot models', () => {
+      expect(getMultiplier({ model: 'moonshot.kimi', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot.kimi'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot.kimi', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot.kimi'].completion,
+      );
+      expect(getMultiplier({ model: 'moonshot.kimi-k2', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot.kimi-k2'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot.kimi-k2.5', tokenType: 'prompt' })).toBe(
+        tokenValues['moonshot.kimi-k2.5'].prompt,
+      );
+      expect(getMultiplier({ model: 'moonshot.kimi-k2.5', tokenType: 'completion' })).toBe(
+        tokenValues['moonshot.kimi-k2.5'].completion,
+      );
    });
  });
 });
@ -1135,6 +1404,73 @@ describe('getCacheMultiplier', () => {
    ).toBeNull();
  });

+  it('should return correct cache multipliers for OpenAI models', () => {
+    const openaiCacheModels = [
+      'gpt-4o',
+      'gpt-4o-mini',
+      'gpt-4.1',
+      'gpt-4.1-mini',
+      'gpt-4.1-nano',
+      'gpt-5',
+      'gpt-5.1',
+      'gpt-5.2',
+      'gpt-5.3',
+      'gpt-5.4',
+      'gpt-5-mini',
+      'gpt-5-nano',
+      'o1',
+      'o1-mini',
+      'o1-preview',
+      'o3',
+      'o3-mini',
+      'o4-mini',
+    ];
+
+    for (const model of openaiCacheModels) {
+      expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(cacheTokenValues[model].write);
+      expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(cacheTokenValues[model].read);
+    }
+  });
+
+  it('should return correct cache multipliers for OpenAI dated variants', () => {
+    expect(getCacheMultiplier({ model: 'gpt-4o-2024-08-06', cacheType: 'read' })).toBe(
+      cacheTokenValues['gpt-4o'].read,
+    );
+    expect(getCacheMultiplier({ model: 'gpt-4.1-2026-01-01', cacheType: 'read' })).toBe(
+      cacheTokenValues['gpt-4.1'].read,
+    );
+    expect(getCacheMultiplier({ model: 'gpt-5.3-codex', cacheType: 'read' })).toBe(
+      cacheTokenValues['gpt-5.3'].read,
+    );
+    expect(getCacheMultiplier({ model: 'openai/gpt-5.3', cacheType: 'write' })).toBe(
+      cacheTokenValues['gpt-5.3'].write,
+    );
+  });
+
+  it('should return null for pro models that do not support caching', () => {
+    expect(getCacheMultiplier({ model: 'gpt-5-pro', cacheType: 'read' })).toBeNull();
+    expect(getCacheMultiplier({ model: 'gpt-5-pro', cacheType: 'write' })).toBeNull();
+    expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'read' })).toBeNull();
+    expect(getCacheMultiplier({ model: 'gpt-5.2-pro', cacheType: 'write' })).toBeNull();
+    expect(getCacheMultiplier({ model: 'gpt-5.4-pro', cacheType: 'read' })).toBeNull();
+    expect(getCacheMultiplier({ model: 'gpt-5.4-pro', cacheType: 'write' })).toBeNull();
+  });
+
+  it('should have consistent 10% cache read pricing for gpt-5.x models', () => {
+    const gpt5CacheModels = [
+      'gpt-5',
+      'gpt-5.1',
+      'gpt-5.2',
+      'gpt-5.3',
+      'gpt-5.4',
+      'gpt-5-mini',
+      'gpt-5-nano',
+    ];
+    for (const model of gpt5CacheModels) {
+      expect(cacheTokenValues[model].read).toBeCloseTo(cacheTokenValues[model].write * 0.1, 10);
+    }
+  });
+
  it('should handle models with "bedrock/" prefix', () => {
    expect(
      getCacheMultiplier({
@ -1154,6 +1490,9 @@ describe('getCacheMultiplier', () => {
 describe('Google Model Tests', () => {
  const googleModels = [
    'gemini-3',
+    'gemini-3.1-pro-preview',
+    'gemini-3.1-pro-preview-customtools',
+    'gemini-3.1-flash-lite-preview',
    'gemini-2.5-pro',
    'gemini-2.5-flash',
    'gemini-2.5-flash-lite',
@ -1198,6 +1537,9 @@ describe('Google Model Tests', () => {
  it('should map to the correct model keys', () => {
    const expected = {
      'gemini-3': 'gemini-3',
+      'gemini-3.1-pro-preview': 'gemini-3.1',
+      'gemini-3.1-pro-preview-customtools': 'gemini-3.1',
+      'gemini-3.1-flash-lite-preview': 'gemini-3.1-flash-lite',
      'gemini-2.5-pro': 'gemini-2.5-pro',
      'gemini-2.5-flash': 'gemini-2.5-flash',
      'gemini-2.5-flash-lite': 'gemini-2.5-flash-lite',
@ -1241,6 +1583,190 @@ describe('Google Model Tests', () => {
      ).toBe(tokenValues[expected].completion);
    });
  });
+
+  it('should return correct prompt and completion rates for Gemini 3.1', () => {
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview',
+        tokenType: 'prompt',
+        endpoint: EModelEndpoint.google,
+      }),
+    ).toBe(tokenValues['gemini-3.1'].prompt);
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview',
+        tokenType: 'completion',
+        endpoint: EModelEndpoint.google,
+      }),
+    ).toBe(tokenValues['gemini-3.1'].completion);
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview-customtools',
+        tokenType: 'prompt',
+        endpoint: EModelEndpoint.google,
+      }),
+    ).toBe(tokenValues['gemini-3.1'].prompt);
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview-customtools',
+        tokenType: 'completion',
+        endpoint: EModelEndpoint.google,
+      }),
+    ).toBe(tokenValues['gemini-3.1'].completion);
+  });
+
+  it('should return correct cache rates for Gemini 3.1', () => {
+    ['gemini-3.1-pro-preview', 'gemini-3.1-pro-preview-customtools'].forEach((model) => {
+      expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(
+        cacheTokenValues['gemini-3.1'].write,
+      );
+      expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(
+        cacheTokenValues['gemini-3.1'].read,
+      );
+    });
+  });
+
+  it('should return correct rates for Gemini 3.1 Flash-Lite', () => {
+    const model = 'gemini-3.1-flash-lite-preview';
+    expect(getMultiplier({ model, tokenType: 'prompt', endpoint: EModelEndpoint.google })).toBe(
+      tokenValues['gemini-3.1-flash-lite'].prompt,
+    );
+    expect(getMultiplier({ model, tokenType: 'completion', endpoint: EModelEndpoint.google })).toBe(
+      tokenValues['gemini-3.1-flash-lite'].completion,
+    );
+    expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(
+      cacheTokenValues['gemini-3.1-flash-lite'].write,
+    );
+    expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(
+      cacheTokenValues['gemini-3.1-flash-lite'].read,
+    );
+  });
+});
+
+describe('Gemini 3.1 Premium Token Pricing', () => {
+  const premiumKey = 'gemini-3.1';
+  const premiumEntry = premiumTokenValues[premiumKey];
+  const { threshold } = premiumEntry;
+  const belowThreshold = threshold - 1;
+  const aboveThreshold = threshold + 1;
+  const wellAboveThreshold = threshold * 2;
+
+  it('should have premium pricing defined for gemini-3.1', () => {
+    expect(premiumEntry).toBeDefined();
+    expect(premiumEntry.threshold).toBeDefined();
+    expect(premiumEntry.prompt).toBeDefined();
+    expect(premiumEntry.completion).toBeDefined();
+    expect(premiumEntry.prompt).toBeGreaterThan(tokenValues[premiumKey].prompt);
+    expect(premiumEntry.completion).toBeGreaterThan(tokenValues[premiumKey].completion);
+  });
+
+  it('should return null from getPremiumRate when inputTokenCount is below or at threshold', () => {
+    expect(getPremiumRate(premiumKey, 'prompt', belowThreshold)).toBeNull();
+    expect(getPremiumRate(premiumKey, 'completion', belowThreshold)).toBeNull();
+    expect(getPremiumRate(premiumKey, 'prompt', threshold)).toBeNull();
+  });
+
+  it('should return premium rate from getPremiumRate when inputTokenCount exceeds threshold', () => {
+    expect(getPremiumRate(premiumKey, 'prompt', aboveThreshold)).toBe(premiumEntry.prompt);
+    expect(getPremiumRate(premiumKey, 'completion', aboveThreshold)).toBe(premiumEntry.completion);
+    expect(getPremiumRate(premiumKey, 'prompt', wellAboveThreshold)).toBe(premiumEntry.prompt);
+  });
+
+  it('should return null from getPremiumRate when inputTokenCount is undefined or null', () => {
+    expect(getPremiumRate(premiumKey, 'prompt', undefined)).toBeNull();
+    expect(getPremiumRate(premiumKey, 'prompt', null)).toBeNull();
+  });
+
+  it('should return standard rate from getMultiplier when inputTokenCount is below threshold', () => {
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview',
+        tokenType: 'prompt',
+        inputTokenCount: belowThreshold,
+      }),
+    ).toBe(tokenValues[premiumKey].prompt);
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview',
+        tokenType: 'completion',
+        inputTokenCount: belowThreshold,
+      }),
+    ).toBe(tokenValues[premiumKey].completion);
+  });
+
+  it('should return premium rate from getMultiplier when inputTokenCount exceeds threshold', () => {
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview',
+        tokenType: 'prompt',
+        inputTokenCount: aboveThreshold,
+      }),
+    ).toBe(premiumEntry.prompt);
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview',
+        tokenType: 'completion',
+        inputTokenCount: aboveThreshold,
+      }),
+    ).toBe(premiumEntry.completion);
+  });
+
+  it('should return standard rate from getMultiplier when inputTokenCount is exactly at threshold', () => {
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview',
+        tokenType: 'prompt',
+        inputTokenCount: threshold,
+      }),
+    ).toBe(tokenValues[premiumKey].prompt);
+  });
+
+  it('should apply premium pricing to customtools variant above threshold', () => {
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview-customtools',
+        tokenType: 'prompt',
+        inputTokenCount: aboveThreshold,
+      }),
+    ).toBe(premiumEntry.prompt);
+    expect(
+      getMultiplier({
+        model: 'gemini-3.1-pro-preview-customtools',
+        tokenType: 'completion',
+        inputTokenCount: aboveThreshold,
+      }),
+    ).toBe(premiumEntry.completion);
+  });
+
+  it('should use standard rate when inputTokenCount is not provided', () => {
+    expect(getMultiplier({ model: 'gemini-3.1-pro-preview', tokenType: 'prompt' })).toBe(
+      tokenValues[premiumKey].prompt,
+    );
+    expect(getMultiplier({ model: 'gemini-3.1-pro-preview', tokenType: 'completion' })).toBe(
+      tokenValues[premiumKey].completion,
+    );
+  });
+
+  it('should apply premium pricing through getMultiplier with valueKey path', () => {
+    const valueKey = getValueKey('gemini-3.1-pro-preview');
+    expect(valueKey).toBe(premiumKey);
+    expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: aboveThreshold })).toBe(
+      premiumEntry.prompt,
+    );
+    expect(
+      getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: aboveThreshold }),
+    ).toBe(premiumEntry.completion);
+  });
+
+  it('should apply standard pricing through getMultiplier with valueKey path when below threshold', () => {
+    const valueKey = getValueKey('gemini-3.1-pro-preview');
+    expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: belowThreshold })).toBe(
+      tokenValues[premiumKey].prompt,
+    );
+    expect(
+      getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: belowThreshold }),
+    ).toBe(tokenValues[premiumKey].completion);
+  });
 });

 describe('Grok Model Tests - Pricing', () => {
@ -1689,6 +2215,201 @@ describe('Claude Model Tests', () => {
      );
    });
  });
+
+  it('should return correct prompt and completion rates for Claude Opus 4.6', () => {
+    expect(getMultiplier({ model: 'claude-opus-4-6', tokenType: 'prompt' })).toBe(
+      tokenValues['claude-opus-4-6'].prompt,
+    );
+    expect(getMultiplier({ model: 'claude-opus-4-6', tokenType: 'completion' })).toBe(
+      tokenValues['claude-opus-4-6'].completion,
+    );
+  });
+
+  it('should handle Claude Opus 4.6 model name variations', () => {
+    const modelVariations = [
+      'claude-opus-4-6',
+      'claude-opus-4-6-20250801',
+      'claude-opus-4-6-latest',
+      'anthropic/claude-opus-4-6',
+      'claude-opus-4-6/anthropic',
+      'claude-opus-4-6-preview',
+    ];
+
+    modelVariations.forEach((model) => {
+      const valueKey = getValueKey(model);
+      expect(valueKey).toBe('claude-opus-4-6');
+      expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
+        tokenValues['claude-opus-4-6'].prompt,
+      );
+      expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
+        tokenValues['claude-opus-4-6'].completion,
+      );
+    });
+  });
+
+  it('should return correct cache rates for Claude Opus 4.6', () => {
+    expect(getCacheMultiplier({ model: 'claude-opus-4-6', cacheType: 'write' })).toBe(
+      cacheTokenValues['claude-opus-4-6'].write,
+    );
+    expect(getCacheMultiplier({ model: 'claude-opus-4-6', cacheType: 'read' })).toBe(
+      cacheTokenValues['claude-opus-4-6'].read,
+    );
+  });
+
+  it('should handle Claude Opus 4.6 cache rates with model name variations', () => {
+    const modelVariations = [
+      'claude-opus-4-6',
+      'claude-opus-4-6-20250801',
+      'claude-opus-4-6-latest',
+      'anthropic/claude-opus-4-6',
+      'claude-opus-4-6/anthropic',
+      'claude-opus-4-6-preview',
+    ];
+
+    modelVariations.forEach((model) => {
+      expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(
+        cacheTokenValues['claude-opus-4-6'].write,
+      );
+      expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(
+        cacheTokenValues['claude-opus-4-6'].read,
+      );
+    });
+  });
+});
+
+describe('Premium Token Pricing', () => {
+  const premiumModel = 'claude-opus-4-6';
+  const premiumEntry = premiumTokenValues[premiumModel];
+  const { threshold } = premiumEntry;
+  const belowThreshold = threshold - 1;
+  const aboveThreshold = threshold + 1;
+  const wellAboveThreshold = threshold * 2;
+
+  it('should have premium pricing defined for claude-opus-4-6', () => {
+    expect(premiumEntry).toBeDefined();
+    expect(premiumEntry.threshold).toBeDefined();
+    expect(premiumEntry.prompt).toBeDefined();
+    expect(premiumEntry.completion).toBeDefined();
+    expect(premiumEntry.prompt).toBeGreaterThan(tokenValues[premiumModel].prompt);
+    expect(premiumEntry.completion).toBeGreaterThan(tokenValues[premiumModel].completion);
+  });
+
+  it('should return null from getPremiumRate when inputTokenCount is below threshold', () => {
+    expect(getPremiumRate(premiumModel, 'prompt', belowThreshold)).toBeNull();
+    expect(getPremiumRate(premiumModel, 'completion', belowThreshold)).toBeNull();
+    expect(getPremiumRate(premiumModel, 'prompt', threshold)).toBeNull();
+  });
+
+  it('should return premium rate from getPremiumRate when inputTokenCount exceeds threshold', () => {
+    expect(getPremiumRate(premiumModel, 'prompt', aboveThreshold)).toBe(premiumEntry.prompt);
+    expect(getPremiumRate(premiumModel, 'completion', aboveThreshold)).toBe(
+      premiumEntry.completion,
+    );
+    expect(getPremiumRate(premiumModel, 'prompt', wellAboveThreshold)).toBe(premiumEntry.prompt);
+  });
+
+  it('should return null from getPremiumRate when inputTokenCount is undefined or null', () => {
+    expect(getPremiumRate(premiumModel, 'prompt', undefined)).toBeNull();
+    expect(getPremiumRate(premiumModel, 'prompt', null)).toBeNull();
+  });
+
+  it('should return null from getPremiumRate for models without premium pricing', () => {
+    expect(getPremiumRate('claude-opus-4-5', 'prompt', wellAboveThreshold)).toBeNull();
+    expect(getPremiumRate('claude-sonnet-4', 'prompt', wellAboveThreshold)).toBeNull();
+    expect(getPremiumRate('gpt-4o', 'prompt', wellAboveThreshold)).toBeNull();
+  });
+
+  it('should return standard rate from getMultiplier when inputTokenCount is below threshold', () => {
+    expect(
+      getMultiplier({
+        model: premiumModel,
+        tokenType: 'prompt',
+        inputTokenCount: belowThreshold,
+      }),
+    ).toBe(tokenValues[premiumModel].prompt);
+    expect(
+      getMultiplier({
+        model: premiumModel,
+        tokenType: 'completion',
+        inputTokenCount: belowThreshold,
+      }),
+    ).toBe(tokenValues[premiumModel].completion);
+  });
+
+  it('should return premium rate from getMultiplier when inputTokenCount exceeds threshold', () => {
+    expect(
+      getMultiplier({
+        model: premiumModel,
+        tokenType: 'prompt',
+        inputTokenCount: aboveThreshold,
+      }),
+    ).toBe(premiumEntry.prompt);
+    expect(
+      getMultiplier({
+        model: premiumModel,
+        tokenType: 'completion',
+        inputTokenCount: aboveThreshold,
+      }),
+    ).toBe(premiumEntry.completion);
+  });
+
+  it('should return standard rate from getMultiplier when inputTokenCount is exactly at threshold', () => {
+    expect(
+      getMultiplier({ model: premiumModel, tokenType: 'prompt', inputTokenCount: threshold }),
+    ).toBe(tokenValues[premiumModel].prompt);
+  });
+
+  it('should return premium rate from getMultiplier when inputTokenCount is one above threshold', () => {
+    expect(
+      getMultiplier({ model: premiumModel, tokenType: 'prompt', inputTokenCount: aboveThreshold }),
+    ).toBe(premiumEntry.prompt);
+  });
+
+  it('should not apply premium pricing to models without premium entries', () => {
+    expect(
+      getMultiplier({
+        model: 'claude-opus-4-5',
+        tokenType: 'prompt',
+        inputTokenCount: wellAboveThreshold,
+      }),
+    ).toBe(tokenValues['claude-opus-4-5'].prompt);
+    expect(
+      getMultiplier({
+        model: 'claude-sonnet-4',
+        tokenType: 'prompt',
+        inputTokenCount: wellAboveThreshold,
+      }),
+    ).toBe(tokenValues['claude-sonnet-4'].prompt);
+  });
+
+  it('should use standard rate when inputTokenCount is not provided', () => {
+    expect(getMultiplier({ model: premiumModel, tokenType: 'prompt' })).toBe(
+      tokenValues[premiumModel].prompt,
+    );
+    expect(getMultiplier({ model: premiumModel, tokenType: 'completion' })).toBe(
+      tokenValues[premiumModel].completion,
+    );
+  });
+
+  it('should apply premium pricing through getMultiplier with valueKey path', () => {
+    const valueKey = getValueKey(premiumModel);
+    expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: aboveThreshold })).toBe(
+      premiumEntry.prompt,
+    );
+    expect(
+      getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: aboveThreshold }),
+    ).toBe(premiumEntry.completion);
+  });
+
+  it('should apply standard pricing through getMultiplier with valueKey path when below threshold', () => {
+    const valueKey = getValueKey(premiumModel);
+    expect(getMultiplier({ valueKey, tokenType: 'prompt', inputTokenCount: belowThreshold })).toBe(
+      tokenValues[premiumModel].prompt,
+    );
+    expect(
+      getMultiplier({ valueKey, tokenType: 'completion', inputTokenCount: belowThreshold }),
+    ).toBe(tokenValues[premiumModel].completion);
+  });
 });

 describe('tokens.ts and tx.js sync validation', () => {
--- a/api/package.json
+++ b/api/package.json
@ -1,6 +1,6 @@
 {
  "name": "@librechat/backend",
-  "version": "v0.8.2-rc2",
+  "version": "v0.8.3",
  "description": "",
  "scripts": {
    "start": "echo 'please run this from the root directory'",
@ -34,26 +34,25 @@
  },
  "homepage": "https://librechat.ai",
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.71.0",
-    "@anthropic-ai/vertex-sdk": "^0.14.0",
-    "@aws-sdk/client-bedrock-runtime": "^3.941.0",
-    "@aws-sdk/client-s3": "^3.758.0",
+    "@anthropic-ai/vertex-sdk": "^0.14.3",
+    "@aws-sdk/client-bedrock-runtime": "^3.980.0",
+    "@aws-sdk/client-s3": "^3.980.0",
    "@aws-sdk/s3-request-presigner": "^3.758.0",
    "@azure/identity": "^4.7.0",
    "@azure/search-documents": "^12.0.0",
-    "@azure/storage-blob": "^12.27.0",
+    "@azure/storage-blob": "^12.30.0",
    "@google/genai": "^1.19.0",
-    "@googleapis/youtube": "^20.0.0",
    "@keyv/redis": "^4.3.3",
    "@langchain/core": "^0.3.80",
-    "@librechat/agents": "^3.0.66",
+    "@librechat/agents": "^3.1.55",
    "@librechat/api": "*",
    "@librechat/data-schemas": "*",
    "@microsoft/microsoft-graph-client": "^3.0.7",
-    "@modelcontextprotocol/sdk": "^1.25.2",
+    "@modelcontextprotocol/sdk": "^1.27.1",
    "@node-saml/passport-saml": "^5.1.0",
    "@smithy/node-http-handler": "^4.4.5",
-    "axios": "^1.12.1",
+    "ai-tokenizer": "^1.0.6",
+    "axios": "^1.13.5",
    "bcryptjs": "^2.4.3",
    "compression": "^1.8.1",
    "connect-redis": "^8.1.0",
@ -65,10 +64,10 @@
    "eventsource": "^3.0.2",
    "express": "^5.2.1",
    "express-mongo-sanitize": "^2.2.0",
-    "express-rate-limit": "^8.2.1",
+    "express-rate-limit": "^8.3.0",
    "express-session": "^1.18.2",
    "express-static-gzip": "^2.2.0",
-    "file-type": "^18.7.0",
+    "file-type": "^21.3.2",
    "firebase": "^11.0.2",
    "form-data": "^4.0.4",
    "handlebars": "^4.7.7",
@ -81,14 +80,15 @@
    "keyv-file": "^5.1.2",
    "klona": "^2.0.6",
    "librechat-data-provider": "*",
-    "lodash": "^4.17.21",
+    "lodash": "^4.17.23",
+    "mammoth": "^1.11.0",
    "mathjs": "^15.1.0",
    "meilisearch": "^0.38.0",
    "memorystore": "^1.6.7",
    "mime": "^3.0.0",
    "module-alias": "^2.2.3",
    "mongoose": "^8.12.1",
-    "multer": "^2.0.2",
+    "multer": "^2.1.1",
    "nanoid": "^3.3.7",
    "node-fetch": "^2.7.0",
    "nodemailer": "^7.0.11",
@ -104,15 +104,15 @@
    "passport-jwt": "^4.0.1",
    "passport-ldapauth": "^3.0.1",
    "passport-local": "^1.0.0",
+    "pdfjs-dist": "^5.4.624",
    "rate-limit-redis": "^4.2.0",
    "sharp": "^0.33.5",
-    "tiktoken": "^1.0.15",
    "traverse": "^0.6.7",
    "ua-parser-js": "^1.0.36",
-    "undici": "^7.10.0",
+    "undici": "^7.24.1",
    "winston": "^3.11.0",
    "winston-daily-rotate-file": "^5.0.0",
-    "youtube-transcript": "^1.2.1",
+    "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz",
    "zod": "^3.22.4"
  },
  "devDependencies": {
--- a/api/server/cleanup.js
+++ b/api/server/cleanup.js
@ -35,7 +35,6 @@ const graphPropsToClean = [
  'tools',
  'signal',
  'config',
-  'agentContexts',
  'messages',
  'contentData',
  'stepKeyIds',
@ -277,7 +276,16 @@ function disposeClient(client) {

    if (client.run) {
      if (client.run.Graph) {
-        client.run.Graph.resetValues();
+        if (typeof client.run.Graph.clearHeavyState === 'function') {
+          client.run.Graph.clearHeavyState();
+        } else {
+          client.run.Graph.resetValues();
+        }
+
+        if (client.run.Graph.agentContexts) {
+          client.run.Graph.agentContexts.clear();
+          client.run.Graph.agentContexts = null;
+        }

        graphPropsToClean.forEach((prop) => {
          if (client.run.Graph[prop] !== undefined) {
--- a/api/server/controllers/AuthController.js
+++ b/api/server/controllers/AuthController.js
@ -18,8 +18,7 @@ const {
  findUser,
 } = require('~/models');
 const { getGraphApiToken } = require('~/server/services/GraphTokenService');
-const { getOAuthReconnectionManager } = require('~/config');
-const { getOpenIdConfig } = require('~/strategies');
+const { getOpenIdConfig, getOpenIdEmail } = require('~/strategies');

 const registrationController = async (req, res) => {
  try {
@ -79,11 +78,16 @@ const refreshController = async (req, res) => {

    try {
      const openIdConfig = getOpenIdConfig();
-      const tokenset = await openIdClient.refreshTokenGrant(openIdConfig, refreshToken);
+      const refreshParams = process.env.OPENID_SCOPE ? { scope: process.env.OPENID_SCOPE } : {};
+      const tokenset = await openIdClient.refreshTokenGrant(
+        openIdConfig,
+        refreshToken,
+        refreshParams,
+      );
      const claims = tokenset.claims();
      const { user, error, migration } = await findOpenIDUser({
        findUser,
-        email: claims.email,
+        email: getOpenIdEmail(claims),
        openidId: claims.sub,
        idOnTheSource: claims.oid,
        strategyName: 'refreshController',
@ -161,17 +165,6 @@ const refreshController = async (req, res) => {
    if (session && session.expiration > new Date()) {
      const token = await setAuthTokens(userId, res, session);

-      // trigger OAuth MCP server reconnection asynchronously (best effort)
-      try {
-        void getOAuthReconnectionManager()
-          .reconnectServers(userId)
-          .catch((err) => {
-            logger.error('[refreshController] Error reconnecting OAuth MCP servers:', err);
-          });
-      } catch (err) {
-        logger.warn(`[refreshController] Cannot attempt OAuth MCP servers reconnection:`, err);
-      }
-
      res.status(200).send({ token, user });
    } else if (req?.query?.retry) {
      // Retrying from a refresh token request that failed (401)
@ -203,15 +196,6 @@ const graphTokenController = async (req, res) => {
      });
    }

-    // Extract access token from Authorization header
-    const authHeader = req.headers.authorization;
-    if (!authHeader || !authHeader.startsWith('Bearer ')) {
-      return res.status(401).json({
-        message: 'Valid authorization token required',
-      });
-    }
-
-    // Get scopes from query parameters
    const scopes = req.query.scopes;
    if (!scopes) {
      return res.status(400).json({
@ -219,7 +203,13 @@ const graphTokenController = async (req, res) => {
      });
    }

-    const accessToken = authHeader.substring(7); // Remove 'Bearer ' prefix
+    const accessToken = req.user.federatedTokens?.access_token;
+    if (!accessToken) {
+      return res.status(401).json({
+        message: 'No federated access token available for token exchange',
+      });
+    }
+
    const tokenResponse = await getGraphApiToken(req.user, accessToken, scopes);

    res.json(tokenResponse);
--- a/api/server/controllers/AuthController.spec.js
+++ b/api/server/controllers/AuthController.spec.js
@ -0,0 +1,302 @@
+jest.mock('@librechat/data-schemas', () => ({
+  logger: { error: jest.fn(), debug: jest.fn(), warn: jest.fn(), info: jest.fn() },
+}));
+jest.mock('~/server/services/GraphTokenService', () => ({
+  getGraphApiToken: jest.fn(),
+}));
+jest.mock('~/server/services/AuthService', () => ({
+  requestPasswordReset: jest.fn(),
+  setOpenIDAuthTokens: jest.fn(),
+  resetPassword: jest.fn(),
+  setAuthTokens: jest.fn(),
+  registerUser: jest.fn(),
+}));
+jest.mock('~/strategies', () => ({ getOpenIdConfig: jest.fn(), getOpenIdEmail: jest.fn() }));
+jest.mock('openid-client', () => ({ refreshTokenGrant: jest.fn() }));
+jest.mock('~/models', () => ({
+  deleteAllUserSessions: jest.fn(),
+  getUserById: jest.fn(),
+  findSession: jest.fn(),
+  updateUser: jest.fn(),
+  findUser: jest.fn(),
+}));
+jest.mock('@librechat/api', () => ({
+  isEnabled: jest.fn(),
+  findOpenIDUser: jest.fn(),
+}));
+
+const openIdClient = require('openid-client');
+const { isEnabled, findOpenIDUser } = require('@librechat/api');
+const { graphTokenController, refreshController } = require('./AuthController');
+const { getGraphApiToken } = require('~/server/services/GraphTokenService');
+const { setOpenIDAuthTokens } = require('~/server/services/AuthService');
+const { getOpenIdConfig, getOpenIdEmail } = require('~/strategies');
+const { updateUser } = require('~/models');
+
+describe('graphTokenController', () => {
+  let req, res;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    isEnabled.mockReturnValue(true);
+
+    req = {
+      user: {
+        openidId: 'oid-123',
+        provider: 'openid',
+        federatedTokens: {
+          access_token: 'federated-access-token',
+          id_token: 'federated-id-token',
+        },
+      },
+      headers: { authorization: 'Bearer app-jwt-which-is-id-token' },
+      query: { scopes: 'https://graph.microsoft.com/.default' },
+    };
+
+    res = {
+      status: jest.fn().mockReturnThis(),
+      json: jest.fn(),
+    };
+
+    getGraphApiToken.mockResolvedValue({
+      access_token: 'graph-access-token',
+      token_type: 'Bearer',
+      expires_in: 3600,
+    });
+  });
+
+  it('should pass federatedTokens.access_token as OBO assertion, not the auth header bearer token', async () => {
+    await graphTokenController(req, res);
+
+    expect(getGraphApiToken).toHaveBeenCalledWith(
+      req.user,
+      'federated-access-token',
+      'https://graph.microsoft.com/.default',
+    );
+    expect(getGraphApiToken).not.toHaveBeenCalledWith(
+      expect.anything(),
+      'app-jwt-which-is-id-token',
+      expect.anything(),
+    );
+  });
+
+  it('should return the graph token response on success', async () => {
+    await graphTokenController(req, res);
+
+    expect(res.json).toHaveBeenCalledWith({
+      access_token: 'graph-access-token',
+      token_type: 'Bearer',
+      expires_in: 3600,
+    });
+  });
+
+  it('should return 403 when user is not authenticated via Entra ID', async () => {
+    req.user.provider = 'google';
+    req.user.openidId = undefined;
+
+    await graphTokenController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(403);
+    expect(getGraphApiToken).not.toHaveBeenCalled();
+  });
+
+  it('should return 403 when OPENID_REUSE_TOKENS is not enabled', async () => {
+    isEnabled.mockReturnValue(false);
+
+    await graphTokenController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(403);
+    expect(getGraphApiToken).not.toHaveBeenCalled();
+  });
+
+  it('should return 400 when scopes query param is missing', async () => {
+    req.query.scopes = undefined;
+
+    await graphTokenController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(400);
+    expect(getGraphApiToken).not.toHaveBeenCalled();
+  });
+
+  it('should return 401 when federatedTokens.access_token is missing', async () => {
+    req.user.federatedTokens = {};
+
+    await graphTokenController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(getGraphApiToken).not.toHaveBeenCalled();
+  });
+
+  it('should return 401 when federatedTokens is absent entirely', async () => {
+    req.user.federatedTokens = undefined;
+
+    await graphTokenController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(getGraphApiToken).not.toHaveBeenCalled();
+  });
+
+  it('should return 500 when getGraphApiToken throws', async () => {
+    getGraphApiToken.mockRejectedValue(new Error('OBO exchange failed'));
+
+    await graphTokenController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(500);
+    expect(res.json).toHaveBeenCalledWith({
+      message: 'Failed to obtain Microsoft Graph token',
+    });
+  });
+});
+
+describe('refreshController – OpenID path', () => {
+  const mockTokenset = {
+    claims: jest.fn(),
+    access_token: 'new-access',
+    id_token: 'new-id',
+    refresh_token: 'new-refresh',
+  };
+
+  const baseClaims = {
+    sub: 'oidc-sub-123',
+    oid: 'oid-456',
+    email: 'user@example.com',
+    exp: 9999999999,
+  };
+
+  let req, res;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+
+    isEnabled.mockReturnValue(true);
+    getOpenIdConfig.mockReturnValue({ some: 'config' });
+    openIdClient.refreshTokenGrant.mockResolvedValue(mockTokenset);
+    mockTokenset.claims.mockReturnValue(baseClaims);
+    getOpenIdEmail.mockReturnValue(baseClaims.email);
+    setOpenIDAuthTokens.mockReturnValue('new-app-token');
+    updateUser.mockResolvedValue({});
+
+    req = {
+      headers: { cookie: 'token_provider=openid; refreshToken=stored-refresh' },
+      session: {},
+    };
+
+    res = {
+      status: jest.fn().mockReturnThis(),
+      send: jest.fn().mockReturnThis(),
+      redirect: jest.fn(),
+    };
+  });
+
+  it('should call getOpenIdEmail with token claims and use result for findOpenIDUser', async () => {
+    const user = {
+      _id: 'user-db-id',
+      email: baseClaims.email,
+      openidId: baseClaims.sub,
+    };
+    findOpenIDUser.mockResolvedValue({ user, error: null, migration: false });
+
+    await refreshController(req, res);
+
+    expect(getOpenIdEmail).toHaveBeenCalledWith(baseClaims);
+    expect(findOpenIDUser).toHaveBeenCalledWith(
+      expect.objectContaining({ email: baseClaims.email }),
+    );
+    expect(res.status).toHaveBeenCalledWith(200);
+  });
+
+  it('should use OPENID_EMAIL_CLAIM-resolved value when claim is present in token', async () => {
+    const claimsWithUpn = { ...baseClaims, upn: 'user@corp.example.com' };
+    mockTokenset.claims.mockReturnValue(claimsWithUpn);
+    getOpenIdEmail.mockReturnValue('user@corp.example.com');
+
+    const user = {
+      _id: 'user-db-id',
+      email: 'user@corp.example.com',
+      openidId: baseClaims.sub,
+    };
+    findOpenIDUser.mockResolvedValue({ user, error: null, migration: false });
+
+    await refreshController(req, res);
+
+    expect(getOpenIdEmail).toHaveBeenCalledWith(claimsWithUpn);
+    expect(findOpenIDUser).toHaveBeenCalledWith(
+      expect.objectContaining({ email: 'user@corp.example.com' }),
+    );
+    expect(res.status).toHaveBeenCalledWith(200);
+  });
+
+  it('should fall back to claims.email when configured claim is absent from token claims', async () => {
+    getOpenIdEmail.mockReturnValue(baseClaims.email);
+
+    const user = {
+      _id: 'user-db-id',
+      email: baseClaims.email,
+      openidId: baseClaims.sub,
+    };
+    findOpenIDUser.mockResolvedValue({ user, error: null, migration: false });
+
+    await refreshController(req, res);
+
+    expect(findOpenIDUser).toHaveBeenCalledWith(
+      expect.objectContaining({ email: baseClaims.email }),
+    );
+  });
+
+  it('should update openidId when migration is triggered on refresh', async () => {
+    const user = { _id: 'user-db-id', email: baseClaims.email, openidId: null };
+    findOpenIDUser.mockResolvedValue({ user, error: null, migration: true });
+
+    await refreshController(req, res);
+
+    expect(updateUser).toHaveBeenCalledWith(
+      'user-db-id',
+      expect.objectContaining({ provider: 'openid', openidId: baseClaims.sub }),
+    );
+    expect(res.status).toHaveBeenCalledWith(200);
+  });
+
+  it('should return 401 and redirect to /login when findOpenIDUser returns no user', async () => {
+    findOpenIDUser.mockResolvedValue({ user: null, error: null, migration: false });
+
+    await refreshController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(res.redirect).toHaveBeenCalledWith('/login');
+  });
+
+  it('should return 401 and redirect when findOpenIDUser returns an error', async () => {
+    findOpenIDUser.mockResolvedValue({ user: null, error: 'AUTH_FAILED', migration: false });
+
+    await refreshController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(res.redirect).toHaveBeenCalledWith('/login');
+  });
+
+  it('should skip OpenID path when token_provider is not openid', async () => {
+    req.headers.cookie = 'token_provider=local; refreshToken=some-token';
+
+    await refreshController(req, res);
+
+    expect(openIdClient.refreshTokenGrant).not.toHaveBeenCalled();
+  });
+
+  it('should skip OpenID path when OPENID_REUSE_TOKENS is disabled', async () => {
+    isEnabled.mockReturnValue(false);
+
+    await refreshController(req, res);
+
+    expect(openIdClient.refreshTokenGrant).not.toHaveBeenCalled();
+  });
+
+  it('should return 200 with token not provided when refresh token is absent', async () => {
+    req.headers.cookie = 'token_provider=openid';
+    req.session = {};
+
+    await refreshController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.send).toHaveBeenCalledWith('Refresh token not provided');
+  });
+});
--- a/api/server/controllers/PermissionsController.js
+++ b/api/server/controllers/PermissionsController.js
@ -5,6 +5,7 @@
 const mongoose = require('mongoose');
 const { logger } = require('@librechat/data-schemas');
 const { ResourceType, PrincipalType, PermissionBits } = require('librechat-data-provider');
+const { enrichRemoteAgentPrincipals, backfillRemoteAgentPermissions } = require('@librechat/api');
 const {
  bulkUpdateResourcePermissions,
  ensureGroupPrincipalExists,
@ -14,7 +15,6 @@ const {
  findAccessibleResources,
  getResourcePermissionsMap,
 } = require('~/server/services/PermissionService');
-const { AclEntry } = require('~/db/models');
 const {
  searchPrincipals: searchLocalPrincipals,
  sortPrincipalsByRelevance,
@ -24,6 +24,7 @@ const {
  entraIdPrincipalFeatureEnabled,
  searchEntraIdPrincipals,
 } = require('~/server/services/GraphApiService');
+const { AclEntry, AccessRole } = require('~/db/models');

 /**
 * Generic controller for resource permission endpoints
@ -234,7 +235,7 @@ const getResourcePermissions = async (req, res) => {
      },
    ]);

-    const principals = [];
+    let principals = [];
    let publicPermission = null;

    // Process aggregation results
@ -280,6 +281,13 @@ const getResourcePermissions = async (req, res) => {
      }
    }

+    if (resourceType === ResourceType.REMOTE_AGENT) {
+      const enricherDeps = { AclEntry, AccessRole, logger };
+      const enrichResult = await enrichRemoteAgentPrincipals(enricherDeps, resourceId, principals);
+      principals = enrichResult.principals;
+      backfillRemoteAgentPermissions(enricherDeps, resourceId, enrichResult.entriesToBackfill);
+    }
+
    // Return response in format expected by frontend
    const response = {
      resourceType,
--- a/api/server/controllers/PluginController.js
+++ b/api/server/controllers/PluginController.js
@ -8,7 +8,7 @@ const { getLogStores } = require('~/cache');

 const getAvailablePluginsController = async (req, res) => {
  try {
-    const cache = getLogStores(CacheKeys.CONFIG_STORE);
+    const cache = getLogStores(CacheKeys.TOOL_CACHE);
    const cachedPlugins = await cache.get(CacheKeys.PLUGINS);
    if (cachedPlugins) {
      res.status(200).json(cachedPlugins);
@ -63,7 +63,7 @@ const getAvailableTools = async (req, res) => {
      logger.warn('[getAvailableTools] User ID not found in request');
      return res.status(401).json({ message: 'Unauthorized' });
    }
-    const cache = getLogStores(CacheKeys.CONFIG_STORE);
+    const cache = getLogStores(CacheKeys.TOOL_CACHE);
    const cachedToolsArray = await cache.get(CacheKeys.TOOLS);

    const appConfig = req.config ?? (await getAppConfig({ role: req.user?.role }));
--- a/api/server/controllers/PluginController.spec.js
+++ b/api/server/controllers/PluginController.spec.js
@ -1,3 +1,4 @@
+const { CacheKeys } = require('librechat-data-provider');
 const { getCachedTools, getAppConfig } = require('~/server/services/Config');
 const { getLogStores } = require('~/cache');

@ -63,6 +64,28 @@ describe('PluginController', () => {
    });
  });

+  describe('cache namespace', () => {
+    it('getAvailablePluginsController should use TOOL_CACHE namespace', async () => {
+      mockCache.get.mockResolvedValue([]);
+      await getAvailablePluginsController(mockReq, mockRes);
+      expect(getLogStores).toHaveBeenCalledWith(CacheKeys.TOOL_CACHE);
+    });
+
+    it('getAvailableTools should use TOOL_CACHE namespace', async () => {
+      mockCache.get.mockResolvedValue([]);
+      await getAvailableTools(mockReq, mockRes);
+      expect(getLogStores).toHaveBeenCalledWith(CacheKeys.TOOL_CACHE);
+    });
+
+    it('should NOT use CONFIG_STORE namespace for tool/plugin operations', async () => {
+      mockCache.get.mockResolvedValue([]);
+      await getAvailablePluginsController(mockReq, mockRes);
+      await getAvailableTools(mockReq, mockRes);
+      const allCalls = getLogStores.mock.calls.flat();
+      expect(allCalls).not.toContain(CacheKeys.CONFIG_STORE);
+    });
+  });
+
  describe('getAvailablePluginsController', () => {
    it('should use filterUniquePlugins to remove duplicate plugins', async () => {
      // Add plugins with duplicates to availableTools
--- a/api/server/controllers/TwoFactorController.js
+++ b/api/server/controllers/TwoFactorController.js
@ -1,5 +1,6 @@
 const { encryptV3, logger } = require('@librechat/data-schemas');
 const {
+  verifyOTPOrBackupCode,
  generateBackupCodes,
  generateTOTPSecret,
  verifyBackupCode,
@ -13,24 +14,42 @@ const safeAppTitle = (process.env.APP_TITLE || 'LibreChat').replace(/\s+/g, '');
 /**
 * Enable 2FA for the user by generating a new TOTP secret and backup codes.
 * The secret is encrypted and stored, and 2FA is marked as disabled until confirmed.
+ * If 2FA is already enabled, requires OTP or backup code verification to re-enroll.
 */
 const enable2FA = async (req, res) => {
  try {
    const userId = req.user.id;
+    const existingUser = await getUserById(
+      userId,
+      '+totpSecret +backupCodes _id twoFactorEnabled email',
+    );
+
+    if (existingUser && existingUser.twoFactorEnabled) {
+      const { token, backupCode } = req.body;
+      const result = await verifyOTPOrBackupCode({
+        user: existingUser,
+        token,
+        backupCode,
+        persistBackupUse: false,
+      });
+
+      if (!result.verified) {
+        const msg = result.message ?? 'TOTP token or backup code is required to re-enroll 2FA';
+        return res.status(result.status ?? 400).json({ message: msg });
+      }
+    }
+
    const secret = generateTOTPSecret();
    const { plainCodes, codeObjects } = await generateBackupCodes();
-
-    // Encrypt the secret with v3 encryption before saving.
    const encryptedSecret = encryptV3(secret);

-    // Update the user record: store the secret & backup codes and set twoFactorEnabled to false.
    const user = await updateUser(userId, {
-      totpSecret: encryptedSecret,
-      backupCodes: codeObjects,
-      twoFactorEnabled: false,
+      pendingTotpSecret: encryptedSecret,
+      pendingBackupCodes: codeObjects,
    });

-    const otpauthUrl = `otpauth://totp/${safeAppTitle}:${user.email}?secret=${secret}&issuer=${safeAppTitle}`;
+    const email = user.email || (existingUser && existingUser.email) || '';
+    const otpauthUrl = `otpauth://totp/${safeAppTitle}:${email}?secret=${secret}&issuer=${safeAppTitle}`;

    return res.status(200).json({ otpauthUrl, backupCodes: plainCodes });
  } catch (err) {
@ -46,13 +65,14 @@ const verify2FA = async (req, res) => {
  try {
    const userId = req.user.id;
    const { token, backupCode } = req.body;
-    const user = await getUserById(userId, '_id totpSecret backupCodes');
+    const user = await getUserById(userId, '+totpSecret +pendingTotpSecret +backupCodes _id');
+    const secretSource = user?.pendingTotpSecret ?? user?.totpSecret;

-    if (!user || !user.totpSecret) {
+    if (!user || !secretSource) {
      return res.status(400).json({ message: '2FA not initiated' });
    }

-    const secret = await getTOTPSecret(user.totpSecret);
+    const secret = await getTOTPSecret(secretSource);
    let isVerified = false;

    if (token) {
@ -78,15 +98,28 @@ const confirm2FA = async (req, res) => {
  try {
    const userId = req.user.id;
    const { token } = req.body;
-    const user = await getUserById(userId, '_id totpSecret');
+    const user = await getUserById(
+      userId,
+      '+totpSecret +pendingTotpSecret +pendingBackupCodes _id',
+    );
+    const secretSource = user?.pendingTotpSecret ?? user?.totpSecret;

-    if (!user || !user.totpSecret) {
+    if (!user || !secretSource) {
      return res.status(400).json({ message: '2FA not initiated' });
    }

-    const secret = await getTOTPSecret(user.totpSecret);
+    const secret = await getTOTPSecret(secretSource);
    if (await verifyTOTP(secret, token)) {
-      await updateUser(userId, { twoFactorEnabled: true });
+      const update = {
+        totpSecret: user.pendingTotpSecret ?? user.totpSecret,
+        twoFactorEnabled: true,
+        pendingTotpSecret: null,
+        pendingBackupCodes: [],
+      };
+      if (user.pendingBackupCodes?.length) {
+        update.backupCodes = user.pendingBackupCodes;
+      }
+      await updateUser(userId, update);
      return res.status(200).json();
    }
    return res.status(400).json({ message: 'Invalid token.' });
@ -104,31 +137,27 @@ const disable2FA = async (req, res) => {
  try {
    const userId = req.user.id;
    const { token, backupCode } = req.body;
-    const user = await getUserById(userId, '_id totpSecret backupCodes');
+    const user = await getUserById(userId, '+totpSecret +backupCodes _id twoFactorEnabled');

    if (!user || !user.totpSecret) {
      return res.status(400).json({ message: '2FA is not setup for this user' });
    }

    if (user.twoFactorEnabled) {
-      const secret = await getTOTPSecret(user.totpSecret);
-      let isVerified = false;
+      const result = await verifyOTPOrBackupCode({ user, token, backupCode });

-      if (token) {
-        isVerified = await verifyTOTP(secret, token);
-      } else if (backupCode) {
-        isVerified = await verifyBackupCode({ user, backupCode });
-      } else {
-        return res
-          .status(400)
-          .json({ message: 'Either token or backup code is required to disable 2FA' });
-      }
-
-      if (!isVerified) {
-        return res.status(401).json({ message: 'Invalid token or backup code' });
+      if (!result.verified) {
+        const msg = result.message ?? 'Either token or backup code is required to disable 2FA';
+        return res.status(result.status ?? 400).json({ message: msg });
      }
    }
-    await updateUser(userId, { totpSecret: null, backupCodes: [], twoFactorEnabled: false });
+    await updateUser(userId, {
+      totpSecret: null,
+      backupCodes: [],
+      twoFactorEnabled: false,
+      pendingTotpSecret: null,
+      pendingBackupCodes: [],
+    });
    return res.status(200).json();
  } catch (err) {
    logger.error('[disable2FA]', err);
@ -138,10 +167,28 @@ const disable2FA = async (req, res) => {

 /**
 * Regenerate backup codes for the user.
+ * Requires OTP or backup code verification if 2FA is already enabled.
 */
 const regenerateBackupCodes = async (req, res) => {
  try {
    const userId = req.user.id;
+    const user = await getUserById(userId, '+totpSecret +backupCodes _id twoFactorEnabled');
+
+    if (!user) {
+      return res.status(404).json({ message: 'User not found' });
+    }
+
+    if (user.twoFactorEnabled) {
+      const { token, backupCode } = req.body;
+      const result = await verifyOTPOrBackupCode({ user, token, backupCode });
+
+      if (!result.verified) {
+        const msg =
+          result.message ?? 'TOTP token or backup code is required to regenerate backup codes';
+        return res.status(result.status ?? 400).json({ message: msg });
+      }
+    }
+
    const { plainCodes, codeObjects } = await generateBackupCodes();
    await updateUser(userId, { backupCodes: codeObjects });
    return res.status(200).json({
--- a/api/server/controllers/UserController.js
+++ b/api/server/controllers/UserController.js
@ -14,6 +14,7 @@ const {
  deleteMessages,
  deletePresets,
  deleteUserKey,
+  getUserById,
  deleteConvos,
  deleteFiles,
  updateUser,
@ -22,6 +23,7 @@ const {
 } = require('~/models');
 const {
  ConversationTag,
+  AgentApiKey,
  Transaction,
  MemoryEntry,
  Assistant,
@ -33,8 +35,10 @@ const {
  User,
 } = require('~/db/models');
 const { updateUserPluginAuth, deleteUserPluginAuth } = require('~/server/services/PluginService');
+const { verifyOTPOrBackupCode } = require('~/server/services/twoFactorService');
 const { verifyEmail, resendVerificationEmail } = require('~/server/services/AuthService');
 const { getMCPManager, getFlowStateManager, getMCPServersRegistry } = require('~/config');
+const { invalidateCachedTools } = require('~/server/services/Config/getCachedTools');
 const { needsRefresh, getNewS3URL } = require('~/server/services/Files/S3/crud');
 const { processDeleteRequest } = require('~/server/services/Files/process');
 const { getAppConfig } = require('~/server/services/Config');
@ -214,6 +218,7 @@ const updateUserPluginsController = async (req, res) => {
              `[updateUserPluginsController] Attempting disconnect of MCP server "${serverName}" for user ${user.id} after plugin auth update.`,
            );
            await mcpManager.disconnectUserConnection(user.id, serverName);
+            await invalidateCachedTools({ userId: user.id, serverName });
          }
        } catch (disconnectError) {
          logger.error(
@ -238,6 +243,22 @@ const deleteUserController = async (req, res) => {
  const { user } = req;

  try {
+    const existingUser = await getUserById(
+      user.id,
+      '+totpSecret +backupCodes _id twoFactorEnabled',
+    );
+    if (existingUser && existingUser.twoFactorEnabled) {
+      const { token, backupCode } = req.body;
+      const result = await verifyOTPOrBackupCode({ user: existingUser, token, backupCode });
+
+      if (!result.verified) {
+        const msg =
+          result.message ??
+          'TOTP token or backup code is required to delete account with 2FA enabled';
+        return res.status(result.status ?? 400).json({ message: msg });
+      }
+    }
+
    await deleteMessages({ user: user.id }); // delete user messages
    await deleteAllUserSessions({ userId: user.id }); // delete user sessions
    await Transaction.deleteMany({ user: user.id }); // delete user transactions
@ -256,6 +277,7 @@ const deleteUserController = async (req, res) => {
    await deleteFiles(null, user.id); // delete database files in case of orphaned files from previous steps
    await deleteToolCalls(user.id); // delete user tool calls
    await deleteUserAgents(user.id); // delete user agents
+    await AgentApiKey.deleteMany({ user: user._id }); // delete user agent API keys
    await Assistant.deleteMany({ user: user.id }); // delete user assistants
    await ConversationTag.deleteMany({ user: user.id }); // delete user conversation tags
    await MemoryEntry.deleteMany({ userId: user.id }); // delete user memory entries
--- a/api/server/controllers/tests/TwoFactorController.spec.js
+++ b/api/server/controllers/tests/TwoFactorController.spec.js
@ -0,0 +1,264 @@
+const mockGetUserById = jest.fn();
+const mockUpdateUser = jest.fn();
+const mockVerifyOTPOrBackupCode = jest.fn();
+const mockGenerateTOTPSecret = jest.fn();
+const mockGenerateBackupCodes = jest.fn();
+const mockEncryptV3 = jest.fn();
+
+jest.mock('@librechat/data-schemas', () => ({
+  encryptV3: (...args) => mockEncryptV3(...args),
+  logger: { error: jest.fn() },
+}));
+
+jest.mock('~/server/services/twoFactorService', () => ({
+  verifyOTPOrBackupCode: (...args) => mockVerifyOTPOrBackupCode(...args),
+  generateBackupCodes: (...args) => mockGenerateBackupCodes(...args),
+  generateTOTPSecret: (...args) => mockGenerateTOTPSecret(...args),
+  verifyBackupCode: jest.fn(),
+  getTOTPSecret: jest.fn(),
+  verifyTOTP: jest.fn(),
+}));
+
+jest.mock('~/models', () => ({
+  getUserById: (...args) => mockGetUserById(...args),
+  updateUser: (...args) => mockUpdateUser(...args),
+}));
+
+const { enable2FA, regenerateBackupCodes } = require('~/server/controllers/TwoFactorController');
+
+function createRes() {
+  const res = {};
+  res.status = jest.fn().mockReturnValue(res);
+  res.json = jest.fn().mockReturnValue(res);
+  return res;
+}
+
+const PLAIN_CODES = ['code1', 'code2', 'code3'];
+const CODE_OBJECTS = [
+  { codeHash: 'h1', used: false, usedAt: null },
+  { codeHash: 'h2', used: false, usedAt: null },
+  { codeHash: 'h3', used: false, usedAt: null },
+];
+
+beforeEach(() => {
+  jest.clearAllMocks();
+  mockGenerateTOTPSecret.mockReturnValue('NEWSECRET');
+  mockGenerateBackupCodes.mockResolvedValue({ plainCodes: PLAIN_CODES, codeObjects: CODE_OBJECTS });
+  mockEncryptV3.mockReturnValue('encrypted-secret');
+});
+
+describe('enable2FA', () => {
+  it('allows first-time setup without token — writes to pending fields', async () => {
+    const req = { user: { id: 'user1' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({ _id: 'user1', twoFactorEnabled: false, email: 'a@b.com' });
+    mockUpdateUser.mockResolvedValue({ email: 'a@b.com' });
+
+    await enable2FA(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.json).toHaveBeenCalledWith(
+      expect.objectContaining({ otpauthUrl: expect.any(String), backupCodes: PLAIN_CODES }),
+    );
+    expect(mockVerifyOTPOrBackupCode).not.toHaveBeenCalled();
+    const updateCall = mockUpdateUser.mock.calls[0][1];
+    expect(updateCall).toHaveProperty('pendingTotpSecret', 'encrypted-secret');
+    expect(updateCall).toHaveProperty('pendingBackupCodes', CODE_OBJECTS);
+    expect(updateCall).not.toHaveProperty('twoFactorEnabled');
+    expect(updateCall).not.toHaveProperty('totpSecret');
+    expect(updateCall).not.toHaveProperty('backupCodes');
+  });
+
+  it('re-enrollment writes to pending fields, leaving live 2FA intact', async () => {
+    const req = { user: { id: 'user1' }, body: { token: '123456' } };
+    const res = createRes();
+    const existingUser = {
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+      email: 'a@b.com',
+    };
+    mockGetUserById.mockResolvedValue(existingUser);
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: true });
+    mockUpdateUser.mockResolvedValue({ email: 'a@b.com' });
+
+    await enable2FA(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).toHaveBeenCalledWith({
+      user: existingUser,
+      token: '123456',
+      backupCode: undefined,
+      persistBackupUse: false,
+    });
+    expect(res.status).toHaveBeenCalledWith(200);
+    const updateCall = mockUpdateUser.mock.calls[0][1];
+    expect(updateCall).toHaveProperty('pendingTotpSecret', 'encrypted-secret');
+    expect(updateCall).toHaveProperty('pendingBackupCodes', CODE_OBJECTS);
+    expect(updateCall).not.toHaveProperty('twoFactorEnabled');
+    expect(updateCall).not.toHaveProperty('totpSecret');
+  });
+
+  it('allows re-enrollment with valid backup code (persistBackupUse: false)', async () => {
+    const req = { user: { id: 'user1' }, body: { backupCode: 'backup123' } };
+    const res = createRes();
+    const existingUser = {
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+      email: 'a@b.com',
+    };
+    mockGetUserById.mockResolvedValue(existingUser);
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: true });
+    mockUpdateUser.mockResolvedValue({ email: 'a@b.com' });
+
+    await enable2FA(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).toHaveBeenCalledWith(
+      expect.objectContaining({ persistBackupUse: false }),
+    );
+    expect(res.status).toHaveBeenCalledWith(200);
+  });
+
+  it('returns error when no token provided and 2FA is enabled', async () => {
+    const req = { user: { id: 'user1' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    });
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: false, status: 400 });
+
+    await enable2FA(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(400);
+    expect(mockUpdateUser).not.toHaveBeenCalled();
+  });
+
+  it('returns 401 when invalid token provided and 2FA is enabled', async () => {
+    const req = { user: { id: 'user1' }, body: { token: 'wrong' } };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    });
+    mockVerifyOTPOrBackupCode.mockResolvedValue({
+      verified: false,
+      status: 401,
+      message: 'Invalid token or backup code',
+    });
+
+    await enable2FA(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(res.json).toHaveBeenCalledWith({ message: 'Invalid token or backup code' });
+    expect(mockUpdateUser).not.toHaveBeenCalled();
+  });
+});
+
+describe('regenerateBackupCodes', () => {
+  it('returns 404 when user not found', async () => {
+    const req = { user: { id: 'user1' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue(null);
+
+    await regenerateBackupCodes(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(404);
+    expect(res.json).toHaveBeenCalledWith({ message: 'User not found' });
+  });
+
+  it('requires OTP when 2FA is enabled', async () => {
+    const req = { user: { id: 'user1' }, body: { token: '123456' } };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    });
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: true });
+    mockUpdateUser.mockResolvedValue({});
+
+    await regenerateBackupCodes(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).toHaveBeenCalled();
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.json).toHaveBeenCalledWith({
+      backupCodes: PLAIN_CODES,
+      backupCodesHash: CODE_OBJECTS,
+    });
+  });
+
+  it('returns error when no token provided and 2FA is enabled', async () => {
+    const req = { user: { id: 'user1' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    });
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: false, status: 400 });
+
+    await regenerateBackupCodes(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(400);
+  });
+
+  it('returns 401 when invalid token provided and 2FA is enabled', async () => {
+    const req = { user: { id: 'user1' }, body: { token: 'wrong' } };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    });
+    mockVerifyOTPOrBackupCode.mockResolvedValue({
+      verified: false,
+      status: 401,
+      message: 'Invalid token or backup code',
+    });
+
+    await regenerateBackupCodes(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(res.json).toHaveBeenCalledWith({ message: 'Invalid token or backup code' });
+  });
+
+  it('includes backupCodesHash in response', async () => {
+    const req = { user: { id: 'user1' }, body: { token: '123456' } };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    });
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: true });
+    mockUpdateUser.mockResolvedValue({});
+
+    await regenerateBackupCodes(req, res);
+
+    const responseBody = res.json.mock.calls[0][0];
+    expect(responseBody).toHaveProperty('backupCodesHash', CODE_OBJECTS);
+    expect(responseBody).toHaveProperty('backupCodes', PLAIN_CODES);
+  });
+
+  it('allows regeneration without token when 2FA is not enabled', async () => {
+    const req = { user: { id: 'user1' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: false,
+    });
+    mockUpdateUser.mockResolvedValue({});
+
+    await regenerateBackupCodes(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).not.toHaveBeenCalled();
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.json).toHaveBeenCalledWith({
+      backupCodes: PLAIN_CODES,
+      backupCodesHash: CODE_OBJECTS,
+    });
+  });
+});
--- a/api/server/controllers/tests/deleteUser.spec.js
+++ b/api/server/controllers/tests/deleteUser.spec.js
@ -0,0 +1,302 @@
+const mockGetUserById = jest.fn();
+const mockDeleteMessages = jest.fn();
+const mockDeleteAllUserSessions = jest.fn();
+const mockDeleteUserById = jest.fn();
+const mockDeleteAllSharedLinks = jest.fn();
+const mockDeletePresets = jest.fn();
+const mockDeleteUserKey = jest.fn();
+const mockDeleteConvos = jest.fn();
+const mockDeleteFiles = jest.fn();
+const mockGetFiles = jest.fn();
+const mockUpdateUserPlugins = jest.fn();
+const mockUpdateUser = jest.fn();
+const mockFindToken = jest.fn();
+const mockVerifyOTPOrBackupCode = jest.fn();
+const mockDeleteUserPluginAuth = jest.fn();
+const mockProcessDeleteRequest = jest.fn();
+const mockDeleteToolCalls = jest.fn();
+const mockDeleteUserAgents = jest.fn();
+const mockDeleteUserPrompts = jest.fn();
+
+jest.mock('@librechat/data-schemas', () => ({
+  logger: { error: jest.fn(), info: jest.fn() },
+  webSearchKeys: [],
+}));
+
+jest.mock('librechat-data-provider', () => ({
+  Tools: {},
+  CacheKeys: {},
+  Constants: { mcp_delimiter: '::', mcp_prefix: 'mcp_' },
+  FileSources: {},
+}));
+
+jest.mock('@librechat/api', () => ({
+  MCPOAuthHandler: {},
+  MCPTokenStorage: {},
+  normalizeHttpError: jest.fn(),
+  extractWebSearchEnvVars: jest.fn(),
+}));
+
+jest.mock('~/models', () => ({
+  deleteAllUserSessions: (...args) => mockDeleteAllUserSessions(...args),
+  deleteAllSharedLinks: (...args) => mockDeleteAllSharedLinks(...args),
+  updateUserPlugins: (...args) => mockUpdateUserPlugins(...args),
+  deleteUserById: (...args) => mockDeleteUserById(...args),
+  deleteMessages: (...args) => mockDeleteMessages(...args),
+  deletePresets: (...args) => mockDeletePresets(...args),
+  deleteUserKey: (...args) => mockDeleteUserKey(...args),
+  getUserById: (...args) => mockGetUserById(...args),
+  deleteConvos: (...args) => mockDeleteConvos(...args),
+  deleteFiles: (...args) => mockDeleteFiles(...args),
+  updateUser: (...args) => mockUpdateUser(...args),
+  findToken: (...args) => mockFindToken(...args),
+  getFiles: (...args) => mockGetFiles(...args),
+}));
+
+jest.mock('~/db/models', () => ({
+  ConversationTag: { deleteMany: jest.fn() },
+  AgentApiKey: { deleteMany: jest.fn() },
+  Transaction: { deleteMany: jest.fn() },
+  MemoryEntry: { deleteMany: jest.fn() },
+  Assistant: { deleteMany: jest.fn() },
+  AclEntry: { deleteMany: jest.fn() },
+  Balance: { deleteMany: jest.fn() },
+  Action: { deleteMany: jest.fn() },
+  Group: { updateMany: jest.fn() },
+  Token: { deleteMany: jest.fn() },
+  User: {},
+}));
+
+jest.mock('~/server/services/PluginService', () => ({
+  updateUserPluginAuth: jest.fn(),
+  deleteUserPluginAuth: (...args) => mockDeleteUserPluginAuth(...args),
+}));
+
+jest.mock('~/server/services/twoFactorService', () => ({
+  verifyOTPOrBackupCode: (...args) => mockVerifyOTPOrBackupCode(...args),
+}));
+
+jest.mock('~/server/services/AuthService', () => ({
+  verifyEmail: jest.fn(),
+  resendVerificationEmail: jest.fn(),
+}));
+
+jest.mock('~/config', () => ({
+  getMCPManager: jest.fn(),
+  getFlowStateManager: jest.fn(),
+  getMCPServersRegistry: jest.fn(),
+}));
+
+jest.mock('~/server/services/Config/getCachedTools', () => ({
+  invalidateCachedTools: jest.fn(),
+}));
+
+jest.mock('~/server/services/Files/S3/crud', () => ({
+  needsRefresh: jest.fn(),
+  getNewS3URL: jest.fn(),
+}));
+
+jest.mock('~/server/services/Files/process', () => ({
+  processDeleteRequest: (...args) => mockProcessDeleteRequest(...args),
+}));
+
+jest.mock('~/server/services/Config', () => ({
+  getAppConfig: jest.fn(),
+}));
+
+jest.mock('~/models/ToolCall', () => ({
+  deleteToolCalls: (...args) => mockDeleteToolCalls(...args),
+}));
+
+jest.mock('~/models/Prompt', () => ({
+  deleteUserPrompts: (...args) => mockDeleteUserPrompts(...args),
+}));
+
+jest.mock('~/models/Agent', () => ({
+  deleteUserAgents: (...args) => mockDeleteUserAgents(...args),
+}));
+
+jest.mock('~/cache', () => ({
+  getLogStores: jest.fn(),
+}));
+
+const { deleteUserController } = require('~/server/controllers/UserController');
+
+function createRes() {
+  const res = {};
+  res.status = jest.fn().mockReturnValue(res);
+  res.json = jest.fn().mockReturnValue(res);
+  res.send = jest.fn().mockReturnValue(res);
+  return res;
+}
+
+function stubDeletionMocks() {
+  mockDeleteMessages.mockResolvedValue();
+  mockDeleteAllUserSessions.mockResolvedValue();
+  mockDeleteUserKey.mockResolvedValue();
+  mockDeletePresets.mockResolvedValue();
+  mockDeleteConvos.mockResolvedValue();
+  mockDeleteUserPluginAuth.mockResolvedValue();
+  mockDeleteUserById.mockResolvedValue();
+  mockDeleteAllSharedLinks.mockResolvedValue();
+  mockGetFiles.mockResolvedValue([]);
+  mockProcessDeleteRequest.mockResolvedValue();
+  mockDeleteFiles.mockResolvedValue();
+  mockDeleteToolCalls.mockResolvedValue();
+  mockDeleteUserAgents.mockResolvedValue();
+  mockDeleteUserPrompts.mockResolvedValue();
+}
+
+beforeEach(() => {
+  jest.clearAllMocks();
+  stubDeletionMocks();
+});
+
+describe('deleteUserController - 2FA enforcement', () => {
+  it('proceeds with deletion when 2FA is not enabled', async () => {
+    const req = { user: { id: 'user1', _id: 'user1', email: 'a@b.com' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({ _id: 'user1', twoFactorEnabled: false });
+
+    await deleteUserController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.send).toHaveBeenCalledWith({ message: 'User deleted' });
+    expect(mockDeleteMessages).toHaveBeenCalled();
+    expect(mockVerifyOTPOrBackupCode).not.toHaveBeenCalled();
+  });
+
+  it('proceeds with deletion when user has no 2FA record', async () => {
+    const req = { user: { id: 'user1', _id: 'user1', email: 'a@b.com' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue(null);
+
+    await deleteUserController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.send).toHaveBeenCalledWith({ message: 'User deleted' });
+  });
+
+  it('returns error when 2FA is enabled and verification fails with 400', async () => {
+    const req = { user: { id: 'user1', _id: 'user1' }, body: {} };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue({
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    });
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: false, status: 400 });
+
+    await deleteUserController(req, res);
+
+    expect(res.status).toHaveBeenCalledWith(400);
+    expect(mockDeleteMessages).not.toHaveBeenCalled();
+  });
+
+  it('returns 401 when 2FA is enabled and invalid TOTP token provided', async () => {
+    const existingUser = {
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    };
+    const req = { user: { id: 'user1', _id: 'user1' }, body: { token: 'wrong' } };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue(existingUser);
+    mockVerifyOTPOrBackupCode.mockResolvedValue({
+      verified: false,
+      status: 401,
+      message: 'Invalid token or backup code',
+    });
+
+    await deleteUserController(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).toHaveBeenCalledWith({
+      user: existingUser,
+      token: 'wrong',
+      backupCode: undefined,
+    });
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(res.json).toHaveBeenCalledWith({ message: 'Invalid token or backup code' });
+    expect(mockDeleteMessages).not.toHaveBeenCalled();
+  });
+
+  it('returns 401 when 2FA is enabled and invalid backup code provided', async () => {
+    const existingUser = {
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+      backupCodes: [],
+    };
+    const req = { user: { id: 'user1', _id: 'user1' }, body: { backupCode: 'bad-code' } };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue(existingUser);
+    mockVerifyOTPOrBackupCode.mockResolvedValue({
+      verified: false,
+      status: 401,
+      message: 'Invalid token or backup code',
+    });
+
+    await deleteUserController(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).toHaveBeenCalledWith({
+      user: existingUser,
+      token: undefined,
+      backupCode: 'bad-code',
+    });
+    expect(res.status).toHaveBeenCalledWith(401);
+    expect(mockDeleteMessages).not.toHaveBeenCalled();
+  });
+
+  it('deletes account when valid TOTP token provided with 2FA enabled', async () => {
+    const existingUser = {
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+    };
+    const req = {
+      user: { id: 'user1', _id: 'user1', email: 'a@b.com' },
+      body: { token: '123456' },
+    };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue(existingUser);
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: true });
+
+    await deleteUserController(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).toHaveBeenCalledWith({
+      user: existingUser,
+      token: '123456',
+      backupCode: undefined,
+    });
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.send).toHaveBeenCalledWith({ message: 'User deleted' });
+    expect(mockDeleteMessages).toHaveBeenCalled();
+  });
+
+  it('deletes account when valid backup code provided with 2FA enabled', async () => {
+    const existingUser = {
+      _id: 'user1',
+      twoFactorEnabled: true,
+      totpSecret: 'enc-secret',
+      backupCodes: [{ codeHash: 'h1', used: false }],
+    };
+    const req = {
+      user: { id: 'user1', _id: 'user1', email: 'a@b.com' },
+      body: { backupCode: 'valid-code' },
+    };
+    const res = createRes();
+    mockGetUserById.mockResolvedValue(existingUser);
+    mockVerifyOTPOrBackupCode.mockResolvedValue({ verified: true });
+
+    await deleteUserController(req, res);
+
+    expect(mockVerifyOTPOrBackupCode).toHaveBeenCalledWith({
+      user: existingUser,
+      token: undefined,
+      backupCode: 'valid-code',
+    });
+    expect(res.status).toHaveBeenCalledWith(200);
+    expect(res.send).toHaveBeenCalledWith({ message: 'User deleted' });
+    expect(mockDeleteMessages).toHaveBeenCalled();
+  });
+});
--- a/api/server/controllers/agents/tests/callbacks.spec.js
+++ b/api/server/controllers/agents/tests/callbacks.spec.js
@ -16,13 +16,10 @@ jest.mock('@librechat/data-schemas', () => ({
 }));

 jest.mock('@librechat/agents', () => ({
-  EnvVar: { CODE_API_KEY: 'CODE_API_KEY' },
-  Providers: { GOOGLE: 'google' },
-  GraphEvents: {},
+  ...jest.requireActual('@librechat/agents'),
  getMessageId: jest.fn(),
  ToolEndHandler: jest.fn(),
  handleToolCalls: jest.fn(),
-  ChatModelStreamHandler: jest.fn(),
 }));

 jest.mock('~/server/services/Files/Citations', () => ({
--- a/api/server/controllers/agents/tests/jobReplacement.spec.js
+++ b/api/server/controllers/agents/tests/jobReplacement.spec.js
@ -0,0 +1,281 @@
+/**
+ * Tests for job replacement detection in ResumableAgentController
+ *
+ * Tests the following fixes from PR #11462:
+ * 1. Job creation timestamp tracking
+ * 2. Stale job detection and event skipping
+ * 3. Response message saving before final event emission
+ */
+
+const mockLogger = {
+  debug: jest.fn(),
+  warn: jest.fn(),
+  error: jest.fn(),
+  info: jest.fn(),
+};
+
+const mockGenerationJobManager = {
+  createJob: jest.fn(),
+  getJob: jest.fn(),
+  emitDone: jest.fn(),
+  emitChunk: jest.fn(),
+  completeJob: jest.fn(),
+  updateMetadata: jest.fn(),
+  setContentParts: jest.fn(),
+  subscribe: jest.fn(),
+};
+
+const mockSaveMessage = jest.fn();
+const mockDecrementPendingRequest = jest.fn();
+
+jest.mock('@librechat/data-schemas', () => ({
+  logger: mockLogger,
+}));
+
+jest.mock('@librechat/api', () => ({
+  isEnabled: jest.fn().mockReturnValue(false),
+  GenerationJobManager: mockGenerationJobManager,
+  checkAndIncrementPendingRequest: jest.fn().mockResolvedValue({ allowed: true }),
+  decrementPendingRequest: (...args) => mockDecrementPendingRequest(...args),
+  getViolationInfo: jest.fn(),
+  sanitizeMessageForTransmit: jest.fn((msg) => msg),
+  sanitizeFileForTransmit: jest.fn((file) => file),
+  Constants: { NO_PARENT: '00000000-0000-0000-0000-000000000000' },
+}));
+
+jest.mock('~/models', () => ({
+  saveMessage: (...args) => mockSaveMessage(...args),
+}));
+
+describe('Job Replacement Detection', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  describe('Job Creation Timestamp Tracking', () => {
+    it('should capture createdAt when job is created', async () => {
+      const streamId = 'test-stream-123';
+      const createdAt = Date.now();
+
+      mockGenerationJobManager.createJob.mockResolvedValue({
+        createdAt,
+        readyPromise: Promise.resolve(),
+        abortController: new AbortController(),
+        emitter: { on: jest.fn() },
+      });
+
+      const job = await mockGenerationJobManager.createJob(streamId, 'user-123', streamId);
+
+      expect(job.createdAt).toBe(createdAt);
+    });
+  });
+
+  describe('Job Replacement Detection Logic', () => {
+    /**
+     * Simulates the job replacement detection logic from request.js
+     * This is extracted for unit testing since the full controller is complex
+     */
+    const detectJobReplacement = async (streamId, originalCreatedAt) => {
+      const currentJob = await mockGenerationJobManager.getJob(streamId);
+      return !currentJob || currentJob.createdAt !== originalCreatedAt;
+    };
+
+    it('should detect when job was replaced (different createdAt)', async () => {
+      const streamId = 'test-stream-123';
+      const originalCreatedAt = 1000;
+      const newCreatedAt = 2000;
+
+      mockGenerationJobManager.getJob.mockResolvedValue({
+        createdAt: newCreatedAt,
+      });
+
+      const wasReplaced = await detectJobReplacement(streamId, originalCreatedAt);
+
+      expect(wasReplaced).toBe(true);
+    });
+
+    it('should detect when job was deleted', async () => {
+      const streamId = 'test-stream-123';
+      const originalCreatedAt = 1000;
+
+      mockGenerationJobManager.getJob.mockResolvedValue(null);
+
+      const wasReplaced = await detectJobReplacement(streamId, originalCreatedAt);
+
+      expect(wasReplaced).toBe(true);
+    });
+
+    it('should not detect replacement when same job (same createdAt)', async () => {
+      const streamId = 'test-stream-123';
+      const originalCreatedAt = 1000;
+
+      mockGenerationJobManager.getJob.mockResolvedValue({
+        createdAt: originalCreatedAt,
+      });
+
+      const wasReplaced = await detectJobReplacement(streamId, originalCreatedAt);
+
+      expect(wasReplaced).toBe(false);
+    });
+  });
+
+  describe('Event Emission Behavior', () => {
+    /**
+     * Simulates the final event emission logic from request.js
+     */
+    const emitFinalEventIfNotReplaced = async ({
+      streamId,
+      originalCreatedAt,
+      finalEvent,
+      userId,
+    }) => {
+      const currentJob = await mockGenerationJobManager.getJob(streamId);
+      const jobWasReplaced = !currentJob || currentJob.createdAt !== originalCreatedAt;
+
+      if (jobWasReplaced) {
+        mockLogger.debug('Skipping FINAL emit - job was replaced', {
+          streamId,
+          originalCreatedAt,
+          currentCreatedAt: currentJob?.createdAt,
+        });
+        await mockDecrementPendingRequest(userId);
+        return false;
+      }
+
+      mockGenerationJobManager.emitDone(streamId, finalEvent);
+      mockGenerationJobManager.completeJob(streamId);
+      await mockDecrementPendingRequest(userId);
+      return true;
+    };
+
+    it('should skip emitting when job was replaced', async () => {
+      const streamId = 'test-stream-123';
+      const originalCreatedAt = 1000;
+      const newCreatedAt = 2000;
+      const userId = 'user-123';
+
+      mockGenerationJobManager.getJob.mockResolvedValue({
+        createdAt: newCreatedAt,
+      });
+
+      const emitted = await emitFinalEventIfNotReplaced({
+        streamId,
+        originalCreatedAt,
+        finalEvent: { final: true },
+        userId,
+      });
+
+      expect(emitted).toBe(false);
+      expect(mockGenerationJobManager.emitDone).not.toHaveBeenCalled();
+      expect(mockGenerationJobManager.completeJob).not.toHaveBeenCalled();
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(userId);
+      expect(mockLogger.debug).toHaveBeenCalledWith(
+        'Skipping FINAL emit - job was replaced',
+        expect.objectContaining({
+          streamId,
+          originalCreatedAt,
+          currentCreatedAt: newCreatedAt,
+        }),
+      );
+    });
+
+    it('should emit when job was not replaced', async () => {
+      const streamId = 'test-stream-123';
+      const originalCreatedAt = 1000;
+      const userId = 'user-123';
+      const finalEvent = { final: true, conversation: { conversationId: streamId } };
+
+      mockGenerationJobManager.getJob.mockResolvedValue({
+        createdAt: originalCreatedAt,
+      });
+
+      const emitted = await emitFinalEventIfNotReplaced({
+        streamId,
+        originalCreatedAt,
+        finalEvent,
+        userId,
+      });
+
+      expect(emitted).toBe(true);
+      expect(mockGenerationJobManager.emitDone).toHaveBeenCalledWith(streamId, finalEvent);
+      expect(mockGenerationJobManager.completeJob).toHaveBeenCalledWith(streamId);
+      expect(mockDecrementPendingRequest).toHaveBeenCalledWith(userId);
+    });
+  });
+
+  describe('Response Message Saving Order', () => {
+    /**
+     * Tests that response messages are saved BEFORE final events are emitted
+     * This prevents race conditions where clients send follow-up messages
+     * before the response is in the database
+     */
+    it('should save message before emitting final event', async () => {
+      const callOrder = [];
+
+      mockSaveMessage.mockImplementation(async () => {
+        callOrder.push('saveMessage');
+      });
+
+      mockGenerationJobManager.emitDone.mockImplementation(() => {
+        callOrder.push('emitDone');
+      });
+
+      mockGenerationJobManager.getJob.mockResolvedValue({
+        createdAt: 1000,
+      });
+
+      // Simulate the order of operations from request.js
+      const streamId = 'test-stream-123';
+      const originalCreatedAt = 1000;
+      const response = { messageId: 'response-123' };
+      const userId = 'user-123';
+
+      // Step 1: Save message
+      await mockSaveMessage({}, { ...response, user: userId }, { context: 'test' });
+
+      // Step 2: Check for replacement
+      const currentJob = await mockGenerationJobManager.getJob(streamId);
+      const jobWasReplaced = !currentJob || currentJob.createdAt !== originalCreatedAt;
+
+      // Step 3: Emit if not replaced
+      if (!jobWasReplaced) {
+        mockGenerationJobManager.emitDone(streamId, { final: true });
+      }
+
+      expect(callOrder).toEqual(['saveMessage', 'emitDone']);
+    });
+  });
+
+  describe('Aborted Request Handling', () => {
+    it('should use unfinished: true instead of error: true for aborted requests', () => {
+      const response = { messageId: 'response-123', content: [] };
+
+      // The new format for aborted responses
+      const abortedResponse = { ...response, unfinished: true };
+
+      expect(abortedResponse.unfinished).toBe(true);
+      expect(abortedResponse.error).toBeUndefined();
+    });
+
+    it('should include unfinished flag in final event for aborted requests', () => {
+      const response = { messageId: 'response-123', content: [] };
+
+      // Old format (deprecated)
+      const _oldFinalEvent = {
+        final: true,
+        responseMessage: { ...response, error: true },
+        error: { message: 'Request was aborted' },
+      };
+
+      // New format (PR #11462)
+      const newFinalEvent = {
+        final: true,
+        responseMessage: { ...response, unfinished: true },
+      };
+
+      expect(newFinalEvent.responseMessage.unfinished).toBe(true);
+      expect(newFinalEvent.error).toBeUndefined();
+      expect(newFinalEvent.responseMessage.error).toBeUndefined();
+    });
+  });
+});
--- a/api/server/controllers/agents/tests/openai.spec.js
+++ b/api/server/controllers/agents/tests/openai.spec.js
@ -0,0 +1,229 @@
+/**
+ * Unit tests for OpenAI-compatible API controller
+ * Tests that recordCollectedUsage is called correctly for token spending
+ */
+
+const mockSpendTokens = jest.fn().mockResolvedValue({});
+const mockSpendStructuredTokens = jest.fn().mockResolvedValue({});
+const mockRecordCollectedUsage = jest
+  .fn()
+  .mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+const mockGetBalanceConfig = jest.fn().mockReturnValue({ enabled: true });
+const mockGetTransactionsConfig = jest.fn().mockReturnValue({ enabled: true });
+
+jest.mock('nanoid', () => ({
+  nanoid: jest.fn(() => 'mock-nanoid-123'),
+}));
+
+jest.mock('@librechat/data-schemas', () => ({
+  logger: {
+    debug: jest.fn(),
+    error: jest.fn(),
+    warn: jest.fn(),
+  },
+}));
+
+jest.mock('@librechat/agents', () => ({
+  Callback: { TOOL_ERROR: 'TOOL_ERROR' },
+  ToolEndHandler: jest.fn(),
+  formatAgentMessages: jest.fn().mockReturnValue({
+    messages: [],
+    indexTokenCountMap: {},
+  }),
+}));
+
+jest.mock('@librechat/api', () => ({
+  writeSSE: jest.fn(),
+  createRun: jest.fn().mockResolvedValue({
+    processStream: jest.fn().mockResolvedValue(undefined),
+  }),
+  createChunk: jest.fn().mockReturnValue({}),
+  buildToolSet: jest.fn().mockReturnValue(new Set()),
+  sendFinalChunk: jest.fn(),
+  createSafeUser: jest.fn().mockReturnValue({ id: 'user-123' }),
+  validateRequest: jest
+    .fn()
+    .mockReturnValue({ request: { model: 'agent-123', messages: [], stream: false } }),
+  initializeAgent: jest.fn().mockResolvedValue({
+    model: 'gpt-4',
+    model_parameters: {},
+    toolRegistry: {},
+  }),
+  getBalanceConfig: mockGetBalanceConfig,
+  createErrorResponse: jest.fn(),
+  getTransactionsConfig: mockGetTransactionsConfig,
+  recordCollectedUsage: mockRecordCollectedUsage,
+  buildNonStreamingResponse: jest.fn().mockReturnValue({ id: 'resp-123' }),
+  createOpenAIStreamTracker: jest.fn().mockReturnValue({
+    addText: jest.fn(),
+    addReasoning: jest.fn(),
+    toolCalls: new Map(),
+    usage: { promptTokens: 0, completionTokens: 0, reasoningTokens: 0 },
+  }),
+  createOpenAIContentAggregator: jest.fn().mockReturnValue({
+    addText: jest.fn(),
+    addReasoning: jest.fn(),
+    getText: jest.fn().mockReturnValue(''),
+    getReasoning: jest.fn().mockReturnValue(''),
+    toolCalls: new Map(),
+    usage: { promptTokens: 100, completionTokens: 50, reasoningTokens: 0 },
+  }),
+  createToolExecuteHandler: jest.fn().mockReturnValue({ handle: jest.fn() }),
+  isChatCompletionValidationFailure: jest.fn().mockReturnValue(false),
+}));
+
+jest.mock('~/server/services/ToolService', () => ({
+  loadAgentTools: jest.fn().mockResolvedValue([]),
+  loadToolsForExecution: jest.fn().mockResolvedValue([]),
+}));
+
+jest.mock('~/models/spendTokens', () => ({
+  spendTokens: mockSpendTokens,
+  spendStructuredTokens: mockSpendStructuredTokens,
+}));
+
+const mockGetMultiplier = jest.fn().mockReturnValue(1);
+const mockGetCacheMultiplier = jest.fn().mockReturnValue(null);
+jest.mock('~/models/tx', () => ({
+  getMultiplier: mockGetMultiplier,
+  getCacheMultiplier: mockGetCacheMultiplier,
+}));
+
+jest.mock('~/server/controllers/agents/callbacks', () => ({
+  createToolEndCallback: jest.fn().mockReturnValue(jest.fn()),
+}));
+
+jest.mock('~/server/services/PermissionService', () => ({
+  findAccessibleResources: jest.fn().mockResolvedValue([]),
+}));
+
+jest.mock('~/models/Conversation', () => ({
+  getConvoFiles: jest.fn().mockResolvedValue([]),
+}));
+
+jest.mock('~/models/Agent', () => ({
+  getAgent: jest.fn().mockResolvedValue({
+    id: 'agent-123',
+    provider: 'openAI',
+    model_parameters: { model: 'gpt-4' },
+  }),
+  getAgents: jest.fn().mockResolvedValue([]),
+}));
+
+const mockUpdateBalance = jest.fn().mockResolvedValue({});
+const mockBulkInsertTransactions = jest.fn().mockResolvedValue(undefined);
+jest.mock('~/models', () => ({
+  getFiles: jest.fn(),
+  getUserKey: jest.fn(),
+  getMessages: jest.fn(),
+  updateFilesUsage: jest.fn(),
+  getUserKeyValues: jest.fn(),
+  getUserCodeFiles: jest.fn(),
+  getToolFilesByIds: jest.fn(),
+  getCodeGeneratedFiles: jest.fn(),
+  updateBalance: mockUpdateBalance,
+  bulkInsertTransactions: mockBulkInsertTransactions,
+}));
+
+describe('OpenAIChatCompletionController', () => {
+  let OpenAIChatCompletionController;
+  let req, res;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+
+    const controller = require('../openai');
+    OpenAIChatCompletionController = controller.OpenAIChatCompletionController;
+
+    req = {
+      body: {
+        model: 'agent-123',
+        messages: [{ role: 'user', content: 'Hello' }],
+        stream: false,
+      },
+      user: { id: 'user-123' },
+      config: {
+        endpoints: {
+          agents: { allowedProviders: ['openAI'] },
+        },
+      },
+      on: jest.fn(),
+    };
+
+    res = {
+      status: jest.fn().mockReturnThis(),
+      json: jest.fn(),
+      setHeader: jest.fn(),
+      flushHeaders: jest.fn(),
+      end: jest.fn(),
+      write: jest.fn(),
+    };
+  });
+
+  describe('token usage recording', () => {
+    it('should call recordCollectedUsage after successful non-streaming completion', async () => {
+      await OpenAIChatCompletionController(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        {
+          spendTokens: mockSpendTokens,
+          spendStructuredTokens: mockSpendStructuredTokens,
+          pricing: { getMultiplier: mockGetMultiplier, getCacheMultiplier: mockGetCacheMultiplier },
+          bulkWriteOps: {
+            insertMany: mockBulkInsertTransactions,
+            updateBalance: mockUpdateBalance,
+          },
+        },
+        expect.objectContaining({
+          user: 'user-123',
+          conversationId: expect.any(String),
+          collectedUsage: expect.any(Array),
+          context: 'message',
+          balance: { enabled: true },
+          transactions: { enabled: true },
+        }),
+      );
+    });
+
+    it('should pass balance and transactions config to recordCollectedUsage', async () => {
+      mockGetBalanceConfig.mockReturnValue({ enabled: true, startBalance: 1000 });
+      mockGetTransactionsConfig.mockReturnValue({ enabled: true, rateLimit: 100 });
+
+      await OpenAIChatCompletionController(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          balance: { enabled: true, startBalance: 1000 },
+          transactions: { enabled: true, rateLimit: 100 },
+        }),
+      );
+    });
+
+    it('should pass spendTokens, spendStructuredTokens, pricing, and bulkWriteOps as dependencies', async () => {
+      await OpenAIChatCompletionController(req, res);
+
+      const [deps] = mockRecordCollectedUsage.mock.calls[0];
+      expect(deps).toHaveProperty('spendTokens', mockSpendTokens);
+      expect(deps).toHaveProperty('spendStructuredTokens', mockSpendStructuredTokens);
+      expect(deps).toHaveProperty('pricing');
+      expect(deps.pricing).toHaveProperty('getMultiplier', mockGetMultiplier);
+      expect(deps.pricing).toHaveProperty('getCacheMultiplier', mockGetCacheMultiplier);
+      expect(deps).toHaveProperty('bulkWriteOps');
+      expect(deps.bulkWriteOps).toHaveProperty('insertMany', mockBulkInsertTransactions);
+      expect(deps.bulkWriteOps).toHaveProperty('updateBalance', mockUpdateBalance);
+    });
+
+    it('should include model from primaryConfig in recordCollectedUsage params', async () => {
+      await OpenAIChatCompletionController(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          model: 'gpt-4',
+        }),
+      );
+    });
+  });
+});
--- a/api/server/controllers/agents/tests/responses.unit.spec.js
+++ b/api/server/controllers/agents/tests/responses.unit.spec.js
@ -0,0 +1,345 @@
+/**
+ * Unit tests for Open Responses API controller
+ * Tests that recordCollectedUsage is called correctly for token spending
+ */
+
+const mockSpendTokens = jest.fn().mockResolvedValue({});
+const mockSpendStructuredTokens = jest.fn().mockResolvedValue({});
+const mockRecordCollectedUsage = jest
+  .fn()
+  .mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+const mockGetBalanceConfig = jest.fn().mockReturnValue({ enabled: true });
+const mockGetTransactionsConfig = jest.fn().mockReturnValue({ enabled: true });
+
+jest.mock('nanoid', () => ({
+  nanoid: jest.fn(() => 'mock-nanoid-123'),
+}));
+
+jest.mock('uuid', () => ({
+  v4: jest.fn(() => 'mock-uuid-456'),
+}));
+
+jest.mock('@librechat/data-schemas', () => ({
+  logger: {
+    debug: jest.fn(),
+    error: jest.fn(),
+    warn: jest.fn(),
+  },
+}));
+
+jest.mock('@librechat/agents', () => ({
+  Callback: { TOOL_ERROR: 'TOOL_ERROR' },
+  ToolEndHandler: jest.fn(),
+  formatAgentMessages: jest.fn().mockReturnValue({
+    messages: [],
+    indexTokenCountMap: {},
+  }),
+}));
+
+jest.mock('@librechat/api', () => ({
+  createRun: jest.fn().mockResolvedValue({
+    processStream: jest.fn().mockResolvedValue(undefined),
+  }),
+  buildToolSet: jest.fn().mockReturnValue(new Set()),
+  createSafeUser: jest.fn().mockReturnValue({ id: 'user-123' }),
+  initializeAgent: jest.fn().mockResolvedValue({
+    model: 'claude-3',
+    model_parameters: {},
+    toolRegistry: {},
+  }),
+  getBalanceConfig: mockGetBalanceConfig,
+  getTransactionsConfig: mockGetTransactionsConfig,
+  recordCollectedUsage: mockRecordCollectedUsage,
+  createToolExecuteHandler: jest.fn().mockReturnValue({ handle: jest.fn() }),
+  // Responses API
+  writeDone: jest.fn(),
+  buildResponse: jest.fn().mockReturnValue({ id: 'resp_123', output: [] }),
+  generateResponseId: jest.fn().mockReturnValue('resp_mock-123'),
+  isValidationFailure: jest.fn().mockReturnValue(false),
+  emitResponseCreated: jest.fn(),
+  createResponseContext: jest.fn().mockReturnValue({ responseId: 'resp_123' }),
+  createResponseTracker: jest.fn().mockReturnValue({
+    usage: { promptTokens: 100, completionTokens: 50 },
+  }),
+  setupStreamingResponse: jest.fn(),
+  emitResponseInProgress: jest.fn(),
+  convertInputToMessages: jest.fn().mockReturnValue([]),
+  validateResponseRequest: jest.fn().mockReturnValue({
+    request: { model: 'agent-123', input: 'Hello', stream: false },
+  }),
+  buildAggregatedResponse: jest.fn().mockReturnValue({
+    id: 'resp_123',
+    status: 'completed',
+    output: [],
+    usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 },
+  }),
+  createResponseAggregator: jest.fn().mockReturnValue({
+    usage: { promptTokens: 100, completionTokens: 50 },
+  }),
+  sendResponsesErrorResponse: jest.fn(),
+  createResponsesEventHandlers: jest.fn().mockReturnValue({
+    handlers: {
+      on_message_delta: { handle: jest.fn() },
+      on_reasoning_delta: { handle: jest.fn() },
+      on_run_step: { handle: jest.fn() },
+      on_run_step_delta: { handle: jest.fn() },
+      on_chat_model_end: { handle: jest.fn() },
+    },
+    finalizeStream: jest.fn(),
+  }),
+  createAggregatorEventHandlers: jest.fn().mockReturnValue({
+    on_message_delta: { handle: jest.fn() },
+    on_reasoning_delta: { handle: jest.fn() },
+    on_run_step: { handle: jest.fn() },
+    on_run_step_delta: { handle: jest.fn() },
+    on_chat_model_end: { handle: jest.fn() },
+  }),
+}));
+
+jest.mock('~/server/services/ToolService', () => ({
+  loadAgentTools: jest.fn().mockResolvedValue([]),
+  loadToolsForExecution: jest.fn().mockResolvedValue([]),
+}));
+
+jest.mock('~/models/spendTokens', () => ({
+  spendTokens: mockSpendTokens,
+  spendStructuredTokens: mockSpendStructuredTokens,
+}));
+
+const mockGetMultiplier = jest.fn().mockReturnValue(1);
+const mockGetCacheMultiplier = jest.fn().mockReturnValue(null);
+jest.mock('~/models/tx', () => ({
+  getMultiplier: mockGetMultiplier,
+  getCacheMultiplier: mockGetCacheMultiplier,
+}));
+
+jest.mock('~/server/controllers/agents/callbacks', () => ({
+  createToolEndCallback: jest.fn().mockReturnValue(jest.fn()),
+  createResponsesToolEndCallback: jest.fn().mockReturnValue(jest.fn()),
+}));
+
+jest.mock('~/server/services/PermissionService', () => ({
+  findAccessibleResources: jest.fn().mockResolvedValue([]),
+}));
+
+jest.mock('~/models/Conversation', () => ({
+  getConvoFiles: jest.fn().mockResolvedValue([]),
+  saveConvo: jest.fn().mockResolvedValue({}),
+  getConvo: jest.fn().mockResolvedValue(null),
+}));
+
+jest.mock('~/models/Agent', () => ({
+  getAgent: jest.fn().mockResolvedValue({
+    id: 'agent-123',
+    name: 'Test Agent',
+    provider: 'anthropic',
+    model_parameters: { model: 'claude-3' },
+  }),
+  getAgents: jest.fn().mockResolvedValue([]),
+}));
+
+const mockUpdateBalance = jest.fn().mockResolvedValue({});
+const mockBulkInsertTransactions = jest.fn().mockResolvedValue(undefined);
+jest.mock('~/models', () => ({
+  getFiles: jest.fn(),
+  getUserKey: jest.fn(),
+  getMessages: jest.fn().mockResolvedValue([]),
+  saveMessage: jest.fn().mockResolvedValue({}),
+  updateFilesUsage: jest.fn(),
+  getUserKeyValues: jest.fn(),
+  getUserCodeFiles: jest.fn(),
+  getToolFilesByIds: jest.fn(),
+  getCodeGeneratedFiles: jest.fn(),
+  updateBalance: mockUpdateBalance,
+  bulkInsertTransactions: mockBulkInsertTransactions,
+}));
+
+describe('createResponse controller', () => {
+  let createResponse;
+  let req, res;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+
+    const controller = require('../responses');
+    createResponse = controller.createResponse;
+
+    req = {
+      body: {
+        model: 'agent-123',
+        input: 'Hello',
+        stream: false,
+      },
+      user: { id: 'user-123' },
+      config: {
+        endpoints: {
+          agents: { allowedProviders: ['anthropic'] },
+        },
+      },
+      on: jest.fn(),
+    };
+
+    res = {
+      status: jest.fn().mockReturnThis(),
+      json: jest.fn(),
+      setHeader: jest.fn(),
+      flushHeaders: jest.fn(),
+      end: jest.fn(),
+      write: jest.fn(),
+    };
+  });
+
+  describe('token usage recording - non-streaming', () => {
+    it('should call recordCollectedUsage after successful non-streaming completion', async () => {
+      await createResponse(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        {
+          spendTokens: mockSpendTokens,
+          spendStructuredTokens: mockSpendStructuredTokens,
+          pricing: { getMultiplier: mockGetMultiplier, getCacheMultiplier: mockGetCacheMultiplier },
+          bulkWriteOps: {
+            insertMany: mockBulkInsertTransactions,
+            updateBalance: mockUpdateBalance,
+          },
+        },
+        expect.objectContaining({
+          user: 'user-123',
+          conversationId: expect.any(String),
+          collectedUsage: expect.any(Array),
+          context: 'message',
+        }),
+      );
+    });
+
+    it('should pass balance and transactions config to recordCollectedUsage', async () => {
+      mockGetBalanceConfig.mockReturnValue({ enabled: true, startBalance: 2000 });
+      mockGetTransactionsConfig.mockReturnValue({ enabled: true });
+
+      await createResponse(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          balance: { enabled: true, startBalance: 2000 },
+          transactions: { enabled: true },
+        }),
+      );
+    });
+
+    it('should pass spendTokens, spendStructuredTokens, pricing, and bulkWriteOps as dependencies', async () => {
+      await createResponse(req, res);
+
+      const [deps] = mockRecordCollectedUsage.mock.calls[0];
+      expect(deps).toHaveProperty('spendTokens', mockSpendTokens);
+      expect(deps).toHaveProperty('spendStructuredTokens', mockSpendStructuredTokens);
+      expect(deps).toHaveProperty('pricing');
+      expect(deps.pricing).toHaveProperty('getMultiplier', mockGetMultiplier);
+      expect(deps.pricing).toHaveProperty('getCacheMultiplier', mockGetCacheMultiplier);
+      expect(deps).toHaveProperty('bulkWriteOps');
+      expect(deps.bulkWriteOps).toHaveProperty('insertMany', mockBulkInsertTransactions);
+      expect(deps.bulkWriteOps).toHaveProperty('updateBalance', mockUpdateBalance);
+    });
+
+    it('should include model from primaryConfig in recordCollectedUsage params', async () => {
+      await createResponse(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          model: 'claude-3',
+        }),
+      );
+    });
+  });
+
+  describe('token usage recording - streaming', () => {
+    beforeEach(() => {
+      req.body.stream = true;
+
+      const api = require('@librechat/api');
+      api.validateResponseRequest.mockReturnValue({
+        request: { model: 'agent-123', input: 'Hello', stream: true },
+      });
+    });
+
+    it('should call recordCollectedUsage after successful streaming completion', async () => {
+      await createResponse(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        {
+          spendTokens: mockSpendTokens,
+          spendStructuredTokens: mockSpendStructuredTokens,
+          pricing: { getMultiplier: mockGetMultiplier, getCacheMultiplier: mockGetCacheMultiplier },
+          bulkWriteOps: {
+            insertMany: mockBulkInsertTransactions,
+            updateBalance: mockUpdateBalance,
+          },
+        },
+        expect.objectContaining({
+          user: 'user-123',
+          context: 'message',
+        }),
+      );
+    });
+  });
+
+  describe('collectedUsage population', () => {
+    it('should collect usage from on_chat_model_end events', async () => {
+      const api = require('@librechat/api');
+
+      let capturedOnChatModelEnd;
+      api.createAggregatorEventHandlers.mockImplementation(() => {
+        return {
+          on_message_delta: { handle: jest.fn() },
+          on_reasoning_delta: { handle: jest.fn() },
+          on_run_step: { handle: jest.fn() },
+          on_run_step_delta: { handle: jest.fn() },
+          on_chat_model_end: {
+            handle: jest.fn((event, data) => {
+              if (capturedOnChatModelEnd) {
+                capturedOnChatModelEnd(event, data);
+              }
+            }),
+          },
+        };
+      });
+
+      api.createRun.mockImplementation(async ({ customHandlers }) => {
+        capturedOnChatModelEnd = (event, data) => {
+          customHandlers.on_chat_model_end.handle(event, data);
+        };
+
+        return {
+          processStream: jest.fn().mockImplementation(async () => {
+            customHandlers.on_chat_model_end.handle('on_chat_model_end', {
+              output: {
+                usage_metadata: {
+                  input_tokens: 150,
+                  output_tokens: 75,
+                  model: 'claude-3',
+                },
+              },
+            });
+          }),
+        };
+      });
+
+      await createResponse(req, res);
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          collectedUsage: expect.arrayContaining([
+            expect.objectContaining({
+              input_tokens: 150,
+              output_tokens: 75,
+            }),
+          ]),
+        }),
+      );
+    });
+  });
+});
--- a/api/server/controllers/agents/callbacks.js
+++ b/api/server/controllers/agents/callbacks.js
@ -1,16 +1,13 @@
 const { nanoid } = require('nanoid');
-const { sendEvent, GenerationJobManager } = require('@librechat/api');
 const { logger } = require('@librechat/data-schemas');
+const { Constants, EnvVar, GraphEvents, ToolEndHandler } = require('@librechat/agents');
 const { Tools, StepTypes, FileContext, ErrorTypes } = require('librechat-data-provider');
 const {
-  EnvVar,
-  Providers,
-  GraphEvents,
-  getMessageId,
-  ToolEndHandler,
-  handleToolCalls,
-  ChatModelStreamHandler,
-} = require('@librechat/agents');
+  sendEvent,
+  GenerationJobManager,
+  writeAttachmentEvent,
+  createToolExecuteHandler,
+} = require('@librechat/api');
 const { processFileCitations } = require('~/server/services/Files/Citations');
 const { processCodeOutput } = require('~/server/services/Files/Code/process');
 const { loadAuthValues } = require('~/server/services/Tools/credentials');
@ -51,8 +48,6 @@ class ModelEndHandler {
    let errorMessage;
    try {
      const agentContext = graph.getAgentContext(metadata);
-      const isGoogle = agentContext.provider === Providers.GOOGLE;
-      const streamingDisabled = !!agentContext.clientOptions?.disableStreaming;
      if (data?.output?.additional_kwargs?.stop_reason === 'refusal') {
        const info = { ...data.output.additional_kwargs };
        errorMessage = JSON.stringify({
@ -67,21 +62,6 @@ class ModelEndHandler {
        });
      }

-      const toolCalls = data?.output?.tool_calls;
-      let hasUnprocessedToolCalls = false;
-      if (Array.isArray(toolCalls) && toolCalls.length > 0 && graph?.toolCallStepIds?.has) {
-        try {
-          hasUnprocessedToolCalls = toolCalls.some(
-            (tc) => tc?.id && !graph.toolCallStepIds.has(tc.id),
-          );
-        } catch {
-          hasUnprocessedToolCalls = false;
-        }
-      }
-      if (isGoogle || streamingDisabled || hasUnprocessedToolCalls) {
-        await handleToolCalls(toolCalls, metadata, graph);
-      }
-
      const usage = data?.output?.usage_metadata;
      if (!usage) {
        return this.finalize(errorMessage);
@ -92,38 +72,6 @@ class ModelEndHandler {
      }

      this.collectedUsage.push(usage);
-      if (!streamingDisabled) {
-        return this.finalize(errorMessage);
-      }
-      if (!data.output.content) {
-        return this.finalize(errorMessage);
-      }
-      const stepKey = graph.getStepKey(metadata);
-      const message_id = getMessageId(stepKey, graph) ?? '';
-      if (message_id) {
-        await graph.dispatchRunStep(stepKey, {
-          type: StepTypes.MESSAGE_CREATION,
-          message_creation: {
-            message_id,
-          },
-        });
-      }
-      const stepId = graph.getStepIdByKey(stepKey);
-      const content = data.output.content;
-      if (typeof content === 'string') {
-        await graph.dispatchMessageDelta(stepId, {
-          content: [
-            {
-              type: 'text',
-              text: content,
-            },
-          ],
-        });
-      } else if (content.every((c) => c.type?.startsWith('text'))) {
-        await graph.dispatchMessageDelta(stepId, {
-          content,
-        });
-      }
    } catch (error) {
      logger.error('Error handling model end event:', error);
      return this.finalize(errorMessage);
@ -146,18 +94,26 @@ function checkIfLastAgent(last_agent_id, langgraph_node) {

 /**
 * Helper to emit events either to res (standard mode) or to job emitter (resumable mode).
+ * In Redis mode, awaits the emit to guarantee event ordering (critical for streaming deltas).
 * @param {ServerResponse} res - The server response object
 * @param {string | null} streamId - The stream ID for resumable mode, or null for standard mode
 * @param {Object} eventData - The event data to send
+ * @returns {Promise<void>}
 */
-function emitEvent(res, streamId, eventData) {
+async function emitEvent(res, streamId, eventData) {
  if (streamId) {
-    GenerationJobManager.emitChunk(streamId, eventData);
+    await GenerationJobManager.emitChunk(streamId, eventData);
  } else {
    sendEvent(res, eventData);
  }
 }

+/**
+ * @typedef {Object} ToolExecuteOptions
+ * @property {(toolNames: string[]) => Promise<{loadedTools: StructuredTool[]}>} loadTools - Function to load tools by name
+ * @property {Object} configurable - Configurable context for tool invocation
+ */
+
 /**
 * Get default handlers for stream events.
 * @param {Object} options - The options object.
@ -166,6 +122,7 @@ function emitEvent(res, streamId, eventData) {
 * @param {ToolEndCallback} options.toolEndCallback - Callback to use when tool ends.
 * @param {Array<UsageMetadata>} options.collectedUsage - The list of collected usage metadata.
 * @param {string | null} [options.streamId] - The stream ID for resumable mode, or null for standard mode.
+ * @param {ToolExecuteOptions} [options.toolExecuteOptions] - Options for event-driven tool execution.
 * @returns {Record<string, t.EventHandler>} The default handlers.
 * @throws {Error} If the request is not found.
 */
@ -175,6 +132,7 @@ function getDefaultHandlers({
  toolEndCallback,
  collectedUsage,
  streamId = null,
+  toolExecuteOptions = null,
 }) {
  if (!res || !aggregateContent) {
    throw new Error(
@ -184,7 +142,6 @@ function getDefaultHandlers({
  const handlers = {
    [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
    [GraphEvents.TOOL_END]: new ToolEndHandler(toolEndCallback, logger),
-    [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
    [GraphEvents.ON_RUN_STEP]: {
      /**
       * Handle ON_RUN_STEP event.
@ -192,18 +149,19 @@ function getDefaultHandlers({
       * @param {StreamEventData} data - The event data.
       * @param {GraphRunnableConfig['configurable']} [metadata] The runnable metadata.
       */
-      handle: (event, data, metadata) => {
+      handle: async (event, data, metadata) => {
+        aggregateContent({ event, data });
        if (data?.stepDetails.type === StepTypes.TOOL_CALLS) {
-          emitEvent(res, streamId, { event, data });
+          await emitEvent(res, streamId, { event, data });
        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          emitEvent(res, streamId, { event, data });
+          await emitEvent(res, streamId, { event, data });
        } else if (!metadata?.hide_sequential_outputs) {
-          emitEvent(res, streamId, { event, data });
+          await emitEvent(res, streamId, { event, data });
        } else {
          const agentName = metadata?.name ?? 'Agent';
          const isToolCall = data?.stepDetails.type === StepTypes.TOOL_CALLS;
          const action = isToolCall ? 'performing a task...' : 'thinking...';
-          emitEvent(res, streamId, {
+          await emitEvent(res, streamId, {
            event: 'on_agent_update',
            data: {
              runId: metadata?.run_id,
@ -211,7 +169,6 @@ function getDefaultHandlers({
            },
          });
        }
-        aggregateContent({ event, data });
      },
    },
    [GraphEvents.ON_RUN_STEP_DELTA]: {
@ -221,15 +178,15 @@ function getDefaultHandlers({
       * @param {StreamEventData} data - The event data.
       * @param {GraphRunnableConfig['configurable']} [metadata] The runnable metadata.
       */
-      handle: (event, data, metadata) => {
-        if (data?.delta.type === StepTypes.TOOL_CALLS) {
-          emitEvent(res, streamId, { event, data });
-        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          emitEvent(res, streamId, { event, data });
-        } else if (!metadata?.hide_sequential_outputs) {
-          emitEvent(res, streamId, { event, data });
-        }
+      handle: async (event, data, metadata) => {
        aggregateContent({ event, data });
+        if (data?.delta.type === StepTypes.TOOL_CALLS) {
+          await emitEvent(res, streamId, { event, data });
+        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
+          await emitEvent(res, streamId, { event, data });
+        } else if (!metadata?.hide_sequential_outputs) {
+          await emitEvent(res, streamId, { event, data });
+        }
      },
    },
    [GraphEvents.ON_RUN_STEP_COMPLETED]: {
@ -239,15 +196,15 @@ function getDefaultHandlers({
       * @param {StreamEventData & { result: ToolEndData }} data - The event data.
       * @param {GraphRunnableConfig['configurable']} [metadata] The runnable metadata.
       */
-      handle: (event, data, metadata) => {
-        if (data?.result != null) {
-          emitEvent(res, streamId, { event, data });
-        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          emitEvent(res, streamId, { event, data });
-        } else if (!metadata?.hide_sequential_outputs) {
-          emitEvent(res, streamId, { event, data });
-        }
+      handle: async (event, data, metadata) => {
        aggregateContent({ event, data });
+        if (data?.result != null) {
+          await emitEvent(res, streamId, { event, data });
+        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
+          await emitEvent(res, streamId, { event, data });
+        } else if (!metadata?.hide_sequential_outputs) {
+          await emitEvent(res, streamId, { event, data });
+        }
      },
    },
    [GraphEvents.ON_MESSAGE_DELTA]: {
@ -257,13 +214,13 @@ function getDefaultHandlers({
       * @param {StreamEventData} data - The event data.
       * @param {GraphRunnableConfig['configurable']} [metadata] The runnable metadata.
       */
-      handle: (event, data, metadata) => {
-        if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          emitEvent(res, streamId, { event, data });
-        } else if (!metadata?.hide_sequential_outputs) {
-          emitEvent(res, streamId, { event, data });
-        }
+      handle: async (event, data, metadata) => {
        aggregateContent({ event, data });
+        if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
+          await emitEvent(res, streamId, { event, data });
+        } else if (!metadata?.hide_sequential_outputs) {
+          await emitEvent(res, streamId, { event, data });
+        }
      },
    },
    [GraphEvents.ON_REASONING_DELTA]: {
@ -273,22 +230,27 @@ function getDefaultHandlers({
       * @param {StreamEventData} data - The event data.
       * @param {GraphRunnableConfig['configurable']} [metadata] The runnable metadata.
       */
-      handle: (event, data, metadata) => {
-        if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          emitEvent(res, streamId, { event, data });
-        } else if (!metadata?.hide_sequential_outputs) {
-          emitEvent(res, streamId, { event, data });
-        }
+      handle: async (event, data, metadata) => {
        aggregateContent({ event, data });
+        if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
+          await emitEvent(res, streamId, { event, data });
+        } else if (!metadata?.hide_sequential_outputs) {
+          await emitEvent(res, streamId, { event, data });
+        }
      },
    },
  };

+  if (toolExecuteOptions) {
+    handlers[GraphEvents.ON_TOOL_EXECUTE] = createToolExecuteHandler(toolExecuteOptions);
+  }
+
  return handlers;
 }

 /**
 * Helper to write attachment events either to res or to job emitter.
+ * Note: Attachments are not order-sensitive like deltas, so fire-and-forget is acceptable.
 * @param {ServerResponse} res - The server response object
 * @param {string | null} streamId - The stream ID for resumable mode, or null for standard mode
 * @param {Object} attachment - The attachment data
@ -408,7 +370,7 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null })
        const { url } = part.image_url;
        artifactPromises.push(
          (async () => {
-            const filename = `${output.name}_${output.tool_call_id}_img_${nanoid()}`;
+            const filename = `${output.name}_img_${nanoid()}`;
            const file_id = output.artifact.file_ids?.[i];
            const file = await saveBase64Image(url, {
              req,
@ -441,10 +403,10 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null })
      return;
    }

-    {
-      if (output.name !== Tools.execute_code) {
-        return;
-      }
+    const isCodeTool =
+      output.name === Tools.execute_code || output.name === Constants.PROGRAMMATIC_TOOL_CALLING;
+    if (!isCodeTool) {
+      return;
    }

    if (!output.artifact.files) {
@ -488,7 +450,226 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null })
  };
 }

+/**
+ * Helper to write attachment events in Open Responses format (librechat:attachment)
+ * @param {ServerResponse} res - The server response object
+ * @param {Object} tracker - The response tracker with sequence number
+ * @param {Object} attachment - The attachment data
+ * @param {Object} metadata - Additional metadata (messageId, conversationId)
+ */
+function writeResponsesAttachment(res, tracker, attachment, metadata) {
+  const sequenceNumber = tracker.nextSequence();
+  writeAttachmentEvent(res, sequenceNumber, attachment, {
+    messageId: metadata.run_id,
+    conversationId: metadata.thread_id,
+  });
+}
+
+/**
+ * Creates a tool end callback specifically for the Responses API.
+ * Emits attachments as `librechat:attachment` events per the Open Responses extension spec.
+ *
+ * @param {Object} params
+ * @param {ServerRequest} params.req
+ * @param {ServerResponse} params.res
+ * @param {Object} params.tracker - Response tracker with sequence number
+ * @param {Promise<MongoFile | { filename: string; filepath: string; expires: number;} | null>[]} params.artifactPromises
+ * @returns {ToolEndCallback} The tool end callback.
+ */
+function createResponsesToolEndCallback({ req, res, tracker, artifactPromises }) {
+  /**
+   * @type {ToolEndCallback}
+   */
+  return async (data, metadata) => {
+    const output = data?.output;
+    if (!output) {
+      return;
+    }
+
+    if (!output.artifact) {
+      return;
+    }
+
+    if (output.artifact[Tools.file_search]) {
+      artifactPromises.push(
+        (async () => {
+          const user = req.user;
+          const attachment = await processFileCitations({
+            user,
+            metadata,
+            appConfig: req.config,
+            toolArtifact: output.artifact,
+            toolCallId: output.tool_call_id,
+          });
+          if (!attachment) {
+            return null;
+          }
+          // For Responses API, emit attachment during streaming
+          if (res.headersSent && !res.writableEnded) {
+            writeResponsesAttachment(res, tracker, attachment, metadata);
+          }
+          return attachment;
+        })().catch((error) => {
+          logger.error('Error processing file citations:', error);
+          return null;
+        }),
+      );
+    }
+
+    if (output.artifact[Tools.ui_resources]) {
+      artifactPromises.push(
+        (async () => {
+          const attachment = {
+            type: Tools.ui_resources,
+            toolCallId: output.tool_call_id,
+            [Tools.ui_resources]: output.artifact[Tools.ui_resources].data,
+          };
+          // For Responses API, always emit attachment during streaming
+          if (res.headersSent && !res.writableEnded) {
+            writeResponsesAttachment(res, tracker, attachment, metadata);
+          }
+          return attachment;
+        })().catch((error) => {
+          logger.error('Error processing artifact content:', error);
+          return null;
+        }),
+      );
+    }
+
+    if (output.artifact[Tools.web_search]) {
+      artifactPromises.push(
+        (async () => {
+          const attachment = {
+            type: Tools.web_search,
+            toolCallId: output.tool_call_id,
+            [Tools.web_search]: { ...output.artifact[Tools.web_search] },
+          };
+          // For Responses API, always emit attachment during streaming
+          if (res.headersSent && !res.writableEnded) {
+            writeResponsesAttachment(res, tracker, attachment, metadata);
+          }
+          return attachment;
+        })().catch((error) => {
+          logger.error('Error processing artifact content:', error);
+          return null;
+        }),
+      );
+    }
+
+    if (output.artifact.content) {
+      /** @type {FormattedContent[]} */
+      const content = output.artifact.content;
+      for (let i = 0; i < content.length; i++) {
+        const part = content[i];
+        if (!part) {
+          continue;
+        }
+        if (part.type !== 'image_url') {
+          continue;
+        }
+        const { url } = part.image_url;
+        artifactPromises.push(
+          (async () => {
+            const filename = `${output.name}_img_${nanoid()}`;
+            const file_id = output.artifact.file_ids?.[i];
+            const file = await saveBase64Image(url, {
+              req,
+              file_id,
+              filename,
+              endpoint: metadata.provider,
+              context: FileContext.image_generation,
+            });
+            const fileMetadata = Object.assign(file, {
+              toolCallId: output.tool_call_id,
+            });
+
+            if (!fileMetadata) {
+              return null;
+            }
+
+            // For Responses API, emit attachment during streaming
+            if (res.headersSent && !res.writableEnded) {
+              const attachment = {
+                file_id: fileMetadata.file_id,
+                filename: fileMetadata.filename,
+                type: fileMetadata.type,
+                url: fileMetadata.filepath,
+                width: fileMetadata.width,
+                height: fileMetadata.height,
+                tool_call_id: output.tool_call_id,
+              };
+              writeResponsesAttachment(res, tracker, attachment, metadata);
+            }
+
+            return fileMetadata;
+          })().catch((error) => {
+            logger.error('Error processing artifact content:', error);
+            return null;
+          }),
+        );
+      }
+      return;
+    }
+
+    const isCodeTool =
+      output.name === Tools.execute_code || output.name === Constants.PROGRAMMATIC_TOOL_CALLING;
+    if (!isCodeTool) {
+      return;
+    }
+
+    if (!output.artifact.files) {
+      return;
+    }
+
+    for (const file of output.artifact.files) {
+      const { id, name } = file;
+      artifactPromises.push(
+        (async () => {
+          const result = await loadAuthValues({
+            userId: req.user.id,
+            authFields: [EnvVar.CODE_API_KEY],
+          });
+          const fileMetadata = await processCodeOutput({
+            req,
+            id,
+            name,
+            apiKey: result[EnvVar.CODE_API_KEY],
+            messageId: metadata.run_id,
+            toolCallId: output.tool_call_id,
+            conversationId: metadata.thread_id,
+            session_id: output.artifact.session_id,
+          });
+
+          if (!fileMetadata) {
+            return null;
+          }
+
+          // For Responses API, emit attachment during streaming
+          if (res.headersSent && !res.writableEnded) {
+            const attachment = {
+              file_id: fileMetadata.file_id,
+              filename: fileMetadata.filename,
+              type: fileMetadata.type,
+              url: fileMetadata.filepath,
+              width: fileMetadata.width,
+              height: fileMetadata.height,
+              tool_call_id: output.tool_call_id,
+            };
+            writeResponsesAttachment(res, tracker, attachment, metadata);
+          }
+
+          return fileMetadata;
+        })().catch((error) => {
+          logger.error('Error processing code output:', error);
+          return null;
+        }),
+      );
+    }
+  };
+}
+
 module.exports = {
  getDefaultHandlers,
  createToolEndCallback,
+  createResponsesToolEndCallback,
 };
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -1,22 +1,28 @@
 require('events').EventEmitter.defaultMaxListeners = 100;
 const { logger } = require('@librechat/data-schemas');
-const { DynamicStructuredTool } = require('@langchain/core/tools');
 const { getBufferString, HumanMessage } = require('@langchain/core/messages');
 const {
  createRun,
  Tokenizer,
  checkAccess,
-  logAxiosError,
+  buildToolSet,
  sanitizeTitle,
+  logToolError,
+  payloadParser,
  resolveHeaders,
  createSafeUser,
  initializeAgent,
  getBalanceConfig,
+  omitTitleOptions,
  getProviderConfig,
  memoryInstructions,
+  createTokenCounter,
+  applyContextToAgent,
+  recordCollectedUsage,
  GenerationJobManager,
  getTransactionsConfig,
  createMemoryProcessor,
+  createMultiAgentMapper,
  filterMalformedContentParts,
 } = require('@librechat/api');
 const {
@ -24,9 +30,7 @@ const {
  Providers,
  TitleMethod,
  formatMessage,
-  labelContentByAgent,
  formatAgentMessages,
-  getTokenCountForMessage,
  createMetadataAggregator,
 } = require('@librechat/agents');
 const {
@ -38,11 +42,12 @@ const {
  PermissionTypes,
  isAgentsEndpoint,
  isEphemeralAgentId,
-  bedrockInputSchema,
  removeNullishValues,
 } = require('librechat-data-provider');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
+const { updateBalance, bulkInsertTransactions } = require('~/models');
+const { getMultiplier, getCacheMultiplier } = require('~/models/tx');
 const { createContextHandlers } = require('~/app/clients/prompts');
 const { getConvoFiles } = require('~/models/Conversation');
 const BaseClient = require('~/app/clients/BaseClient');
@ -51,183 +56,6 @@ const { loadAgent } = require('~/models/Agent');
 const { getMCPManager } = require('~/config');
 const db = require('~/models');

-const omitTitleOptions = new Set([
-  'stream',
-  'thinking',
-  'streaming',
-  'clientOptions',
-  'thinkingConfig',
-  'thinkingBudget',
-  'includeThoughts',
-  'maxOutputTokens',
-  'additionalModelRequestFields',
-]);
-
-/**
- * @param {ServerRequest} req
- * @param {Agent} agent
- * @param {string} endpoint
- */
-const payloadParser = ({ req, agent, endpoint }) => {
-  if (isAgentsEndpoint(endpoint)) {
-    return { model: undefined };
-  } else if (endpoint === EModelEndpoint.bedrock) {
-    const parsedValues = bedrockInputSchema.parse(agent.model_parameters);
-    if (parsedValues.thinking == null) {
-      parsedValues.thinking = false;
-    }
-    return parsedValues;
-  }
-  return req.body.endpointOption.model_parameters;
-};
-
-function createTokenCounter(encoding) {
-  return function (message) {
-    const countTokens = (text) => Tokenizer.getTokenCount(text, encoding);
-    return getTokenCountForMessage(message, countTokens);
-  };
-}
-
-function logToolError(graph, error, toolId) {
-  logAxiosError({
-    error,
-    message: `[api/server/controllers/agents/client.js #chatCompletion] Tool Error "${toolId}"`,
-  });
-}
-
-/** Regex pattern to match agent ID suffix (____N) */
-const AGENT_SUFFIX_PATTERN = /____(\d+)$/;
-
-/**
- * Finds the primary agent ID within a set of agent IDs.
- * Primary = no suffix (____N) or lowest suffix number.
- * @param {Set<string>} agentIds
- * @returns {string | null}
- */
-function findPrimaryAgentId(agentIds) {
-  let primaryAgentId = null;
-  let lowestSuffixIndex = Infinity;
-
-  for (const agentId of agentIds) {
-    const suffixMatch = agentId.match(AGENT_SUFFIX_PATTERN);
-    if (!suffixMatch) {
-      return agentId;
-    }
-    const suffixIndex = parseInt(suffixMatch[1], 10);
-    if (suffixIndex < lowestSuffixIndex) {
-      lowestSuffixIndex = suffixIndex;
-      primaryAgentId = agentId;
-    }
-  }
-
-  return primaryAgentId;
-}
-
-/**
- * Creates a mapMethod for getMessagesForConversation that processes agent content.
- * - Strips agentId/groupId metadata from all content
- * - For parallel agents (addedConvo with groupId): filters each group to its primary agent
- * - For handoffs (agentId without groupId): keeps all content from all agents
- * - For multi-agent: applies agent labels to content
- *
- * The key distinction:
- * - Parallel execution (addedConvo): Parts have both agentId AND groupId
- * - Handoffs: Parts only have agentId, no groupId
- *
- * @param {Agent} primaryAgent - Primary agent configuration
- * @param {Map<string, Agent>} [agentConfigs] - Additional agent configurations
- * @returns {(message: TMessage) => TMessage} Map method for processing messages
- */
-function createMultiAgentMapper(primaryAgent, agentConfigs) {
-  const hasMultipleAgents = (primaryAgent.edges?.length ?? 0) > 0 || (agentConfigs?.size ?? 0) > 0;
-
-  /** @type {Record<string, string> | null} */
-  let agentNames = null;
-  if (hasMultipleAgents) {
-    agentNames = { [primaryAgent.id]: primaryAgent.name || 'Assistant' };
-    if (agentConfigs) {
-      for (const [agentId, agentConfig] of agentConfigs.entries()) {
-        agentNames[agentId] = agentConfig.name || agentConfig.id;
-      }
-    }
-  }
-
-  return (message) => {
-    if (message.isCreatedByUser || !Array.isArray(message.content)) {
-      return message;
-    }
-
-    // Check for metadata
-    const hasAgentMetadata = message.content.some((part) => part?.agentId || part?.groupId != null);
-    if (!hasAgentMetadata) {
-      return message;
-    }
-
-    try {
-      // Build a map of groupId -> Set of agentIds, to find primary per group
-      /** @type {Map<number, Set<string>>} */
-      const groupAgentMap = new Map();
-
-      for (const part of message.content) {
-        const groupId = part?.groupId;
-        const agentId = part?.agentId;
-        if (groupId != null && agentId) {
-          if (!groupAgentMap.has(groupId)) {
-            groupAgentMap.set(groupId, new Set());
-          }
-          groupAgentMap.get(groupId).add(agentId);
-        }
-      }
-
-      // For each group, find the primary agent
-      /** @type {Map<number, string>} */
-      const groupPrimaryMap = new Map();
-      for (const [groupId, agentIds] of groupAgentMap) {
-        const primary = findPrimaryAgentId(agentIds);
-        if (primary) {
-          groupPrimaryMap.set(groupId, primary);
-        }
-      }
-
-      /** @type {Array<TMessageContentParts>} */
-      const filteredContent = [];
-      /** @type {Record<number, string>} */
-      const agentIdMap = {};
-
-      for (const part of message.content) {
-        const agentId = part?.agentId;
-        const groupId = part?.groupId;
-
-        // Filtering logic:
-        // - No groupId (handoffs): always include
-        // - Has groupId (parallel): only include if it's the primary for that group
-        const isParallelPart = groupId != null;
-        const groupPrimary = isParallelPart ? groupPrimaryMap.get(groupId) : null;
-        const shouldInclude = !isParallelPart || !agentId || agentId === groupPrimary;
-
-        if (shouldInclude) {
-          const newIndex = filteredContent.length;
-          const { agentId: _a, groupId: _g, ...cleanPart } = part;
-          filteredContent.push(cleanPart);
-          if (agentId && hasMultipleAgents) {
-            agentIdMap[newIndex] = agentId;
-          }
-        }
-      }
-
-      const finalContent =
-        Object.keys(agentIdMap).length > 0 && agentNames
-          ? labelContentByAgent(filteredContent, agentIdMap, agentNames)
-          : filteredContent;
-
-      return { ...message, content: finalContent };
-    } catch (error) {
-      logger.error('[AgentClient] Error processing multi-agent message:', error);
-      return message;
-    }
-  };
-}
-
 class AgentClient extends BaseClient {
  constructor(options = {}) {
    super(null, options);
@ -295,14 +123,9 @@ class AgentClient extends BaseClient {
  checkVisionRequest() {}

  getSaveOptions() {
-    // TODO:
-    // would need to be override settings; otherwise, model needs to be undefined
-    // model: this.override.model,
-    // instructions: this.override.instructions,
-    // additional_instructions: this.override.additional_instructions,
    let runOptions = {};
    try {
-      runOptions = payloadParser(this.options);
+      runOptions = payloadParser(this.options) ?? {};
    } catch (error) {
      logger.error(
        '[api/server/controllers/agents/client.js #getSaveOptions] Error parsing options',
@ -313,14 +136,14 @@ class AgentClient extends BaseClient {
    return removeNullishValues(
      Object.assign(
        {
+          spec: this.options.spec,
+          iconURL: this.options.iconURL,
          endpoint: this.options.endpoint,
          agent_id: this.options.agent.id,
          modelLabel: this.options.modelLabel,
-          maxContextTokens: this.options.maxContextTokens,
          resendFiles: this.options.resendFiles,
          imageDetail: this.options.imageDetail,
-          spec: this.options.spec,
-          iconURL: this.options.iconURL,
+          maxContextTokens: this.maxContextTokens,
        },
        // TODO: PARSE OPTIONS BY PROVIDER, MAY CONTAIN SENSITIVE DATA
        runOptions,
@ -328,11 +151,13 @@ class AgentClient extends BaseClient {
    );
  }

+  /**
+   * Returns build message options. For AgentClient, agent-specific instructions
+   * are retrieved directly from agent objects in buildMessages, so this returns empty.
+   * @returns {Object} Empty options object
+   */
  getBuildMessagesOptions() {
-    return {
-      instructions: this.options.agent.instructions,
-      additional_instructions: this.options.agent.additional_instructions,
-    };
+    return {};
  }

  /**
@ -355,12 +180,7 @@ class AgentClient extends BaseClient {
    return files;
  }

-  async buildMessages(
-    messages,
-    parentMessageId,
-    { instructions = null, additional_instructions = null },
-    opts,
-  ) {
+  async buildMessages(messages, parentMessageId, _buildOptions, opts) {
    /** Always pass mapMethod; getMessagesForConversation applies it only to messages with addedConvo flag */
    const orderedMessages = this.constructor.getMessagesForConversation({
      messages,
@ -374,11 +194,29 @@ class AgentClient extends BaseClient {
    /** @type {number | undefined} */
    let promptTokens;

-    /** @type {string} */
-    let systemContent = [instructions ?? '', additional_instructions ?? '']
-      .filter(Boolean)
-      .join('\n')
-      .trim();
+    /**
+     * Extract base instructions for all agents (combines instructions + additional_instructions).
+     * This must be done before applying context to preserve the original agent configuration.
+     */
+    const extractBaseInstructions = (agent) => {
+      const baseInstructions = [agent.instructions ?? '', agent.additional_instructions ?? '']
+        .filter(Boolean)
+        .join('\n')
+        .trim();
+      agent.instructions = baseInstructions;
+      return agent;
+    };
+
+    /** Collect all agents for unified processing, extracting base instructions during collection */
+    const allAgents = [
+      { agent: extractBaseInstructions(this.options.agent), agentId: this.options.agent.id },
+      ...(this.agentConfigs?.size > 0
+        ? Array.from(this.agentConfigs.entries()).map(([agentId, agent]) => ({
+            agent: extractBaseInstructions(agent),
+            agentId,
+          }))
+        : []),
+    ];

    if (this.options.attachments) {
      const attachments = await this.options.attachments;
@ -413,6 +251,7 @@ class AgentClient extends BaseClient {
        assistantName: this.options?.modelLabel,
      });

+      /** For non-latest messages, prepend file context directly to message content */
      if (message.fileContext && i !== orderedMessages.length - 1) {
        if (typeof formattedMessage.content === 'string') {
          formattedMessage.content = message.fileContext + '\n' + formattedMessage.content;
@ -422,8 +261,6 @@ class AgentClient extends BaseClient {
            ? (textPart.text = message.fileContext + '\n' + textPart.text)
            : formattedMessage.content.unshift({ type: 'text', text: message.fileContext });
        }
-      } else if (message.fileContext && i === orderedMessages.length - 1) {
-        systemContent = [systemContent, message.fileContext].join('\n');
      }

      const needsTokenCount =
@ -456,46 +293,35 @@ class AgentClient extends BaseClient {
      return formattedMessage;
    });

+    /**
+     * Build shared run context - applies to ALL agents in the run.
+     * This includes: file context (latest message), augmented prompt (RAG), memory context.
+     */
+    const sharedRunContextParts = [];
+
+    /** File context from the latest message (attachments) */
+    const latestMessage = orderedMessages[orderedMessages.length - 1];
+    if (latestMessage?.fileContext) {
+      sharedRunContextParts.push(latestMessage.fileContext);
+    }
+
+    /** Augmented prompt from RAG/context handlers */
    if (this.contextHandlers) {
      this.augmentedPrompt = await this.contextHandlers.createContext();
-      systemContent = this.augmentedPrompt + systemContent;
-    }
-
-    // Inject MCP server instructions if available
-    const ephemeralAgent = this.options.req.body.ephemeralAgent;
-    let mcpServers = [];
-
-    // Check for ephemeral agent MCP servers
-    if (ephemeralAgent && ephemeralAgent.mcp && ephemeralAgent.mcp.length > 0) {
-      mcpServers = ephemeralAgent.mcp;
-    }
-    // Check for regular agent MCP tools
-    else if (this.options.agent && this.options.agent.tools) {
-      mcpServers = this.options.agent.tools
-        .filter(
-          (tool) =>
-            tool instanceof DynamicStructuredTool && tool.name.includes(Constants.mcp_delimiter),
-        )
-        .map((tool) => tool.name.split(Constants.mcp_delimiter).pop())
-        .filter(Boolean);
-    }
-
-    if (mcpServers.length > 0) {
-      try {
-        const mcpInstructions = await getMCPManager().formatInstructionsForContext(mcpServers);
-        if (mcpInstructions) {
-          systemContent = [systemContent, mcpInstructions].filter(Boolean).join('\n\n');
-          logger.debug('[AgentClient] Injected MCP instructions for servers:', mcpServers);
-        }
-      } catch (error) {
-        logger.error('[AgentClient] Failed to inject MCP instructions:', error);
+      if (this.augmentedPrompt) {
+        sharedRunContextParts.push(this.augmentedPrompt);
      }
    }

-    if (systemContent) {
-      this.options.agent.instructions = systemContent;
+    /** Memory context (user preferences/memories) */
+    const withoutKeys = await this.useMemory();
+    if (withoutKeys) {
+      const memoryContext = `${memoryInstructions}\n\n# Existing memory about the user:\n${withoutKeys}`;
+      sharedRunContextParts.push(memoryContext);
    }

+    const sharedRunContext = sharedRunContextParts.join('\n\n');
+
    /** @type {Record<string, number> | undefined} */
    let tokenCountMap;

@ -521,14 +347,27 @@ class AgentClient extends BaseClient {
      opts.getReqData({ promptTokens });
    }

-    const withoutKeys = await this.useMemory();
-    if (withoutKeys) {
-      systemContent += `${memoryInstructions}\n\n# Existing memory about the user:\n${withoutKeys}`;
-    }
-
-    if (systemContent) {
-      this.options.agent.instructions = systemContent;
-    }
+    /**
+     * Apply context to all agents.
+     * Each agent gets: shared run context + their own base instructions + their own MCP instructions.
+     *
+     * NOTE: This intentionally mutates agent objects in place. The agentConfigs Map
+     * holds references to config objects that will be passed to the graph runtime.
+     */
+    const ephemeralAgent = this.options.req.body.ephemeralAgent;
+    const mcpManager = getMCPManager();
+    await Promise.all(
+      allAgents.map(({ agent, agentId }) =>
+        applyContextToAgent({
+          agent,
+          agentId,
+          logger,
+          mcpManager,
+          sharedRunContext,
+          ephemeralAgent: agentId === this.options.agent.id ? ephemeralAgent : undefined,
+        }),
+      ),
+    );

    return result;
  }
@ -600,6 +439,8 @@ class AgentClient extends BaseClient {
          agent_id: memoryConfig.agent.id,
          endpoint: EModelEndpoint.agents,
        });
+      } else if (memoryConfig.agent?.id != null) {
+        prelimAgent = this.options.agent;
      } else if (
        memoryConfig.agent?.id == null &&
        memoryConfig.agent?.model != null &&
@ -614,6 +455,10 @@ class AgentClient extends BaseClient {
      );
    }

+    if (!prelimAgent) {
+      return;
+    }
+
    const agent = await initializeAgent(
      {
        req: this.options.req,
@ -633,6 +478,7 @@ class AgentClient extends BaseClient {
        updateFilesUsage: db.updateFilesUsage,
        getUserKeyValues: db.getUserKeyValues,
        getToolFilesByIds: db.getToolFilesByIds,
+        getCodeGeneratedFiles: db.getCodeGeneratedFiles,
      },
    );

@ -781,89 +627,29 @@ class AgentClient extends BaseClient {
    context = 'message',
    collectedUsage = this.collectedUsage,
  }) {
-    if (!collectedUsage || !collectedUsage.length) {
-      return;
-    }
-    // Support both OpenAI format (input_token_details) and Anthropic format (cache_*_input_tokens)
-    const firstUsage = collectedUsage[0];
-    const input_tokens =
-      (firstUsage?.input_tokens || 0) +
-      (Number(firstUsage?.input_token_details?.cache_creation) ||
-        Number(firstUsage?.cache_creation_input_tokens) ||
-        0) +
-      (Number(firstUsage?.input_token_details?.cache_read) ||
-        Number(firstUsage?.cache_read_input_tokens) ||
-        0);
-
-    let output_tokens = 0;
-    let previousTokens = input_tokens; // Start with original input
-    for (let i = 0; i < collectedUsage.length; i++) {
-      const usage = collectedUsage[i];
-      if (!usage) {
-        continue;
-      }
-
-      // Support both OpenAI format (input_token_details) and Anthropic format (cache_*_input_tokens)
-      const cache_creation =
-        Number(usage.input_token_details?.cache_creation) ||
-        Number(usage.cache_creation_input_tokens) ||
-        0;
-      const cache_read =
-        Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0;
-
-      const txMetadata = {
+    const result = await recordCollectedUsage(
+      {
+        spendTokens,
+        spendStructuredTokens,
+        pricing: { getMultiplier, getCacheMultiplier },
+        bulkWriteOps: { insertMany: bulkInsertTransactions, updateBalance },
+      },
+      {
+        user: this.user ?? this.options.req.user?.id,
+        conversationId: this.conversationId,
+        collectedUsage,
+        model: model ?? this.model ?? this.options.agent.model_parameters.model,
        context,
+        messageId: this.responseMessageId,
        balance,
        transactions,
-        conversationId: this.conversationId,
-        user: this.user ?? this.options.req.user?.id,
        endpointTokenConfig: this.options.endpointTokenConfig,
-        model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
-      };
+      },
+    );

-      if (i > 0) {
-        // Count new tokens generated (input_tokens minus previous accumulated tokens)
-        output_tokens +=
-          (Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens;
-      }
-
-      // Add this message's output tokens
-      output_tokens += Number(usage.output_tokens) || 0;
-
-      // Update previousTokens to include this message's output
-      previousTokens += Number(usage.output_tokens) || 0;
-
-      if (cache_creation > 0 || cache_read > 0) {
-        spendStructuredTokens(txMetadata, {
-          promptTokens: {
-            input: usage.input_tokens,
-            write: cache_creation,
-            read: cache_read,
-          },
-          completionTokens: usage.output_tokens,
-        }).catch((err) => {
-          logger.error(
-            '[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens',
-            err,
-          );
-        });
-        continue;
-      }
-      spendTokens(txMetadata, {
-        promptTokens: usage.input_tokens,
-        completionTokens: usage.output_tokens,
-      }).catch((err) => {
-        logger.error(
-          '[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
-          err,
-        );
-      });
+    if (result) {
+      this.usage = result;
    }
-
-    this.usage = {
-      input_tokens,
-      output_tokens,
-    };
  }

  /**
@ -952,13 +738,13 @@ class AgentClient extends BaseClient {
          },
          user: createSafeUser(this.options.req.user),
        },
-        recursionLimit: agentsEConfig?.recursionLimit ?? 25,
+        recursionLimit: agentsEConfig?.recursionLimit ?? 50,
        signal: abortController.signal,
        streamMode: 'values',
        version: 'v2',
      };

-      const toolSet = new Set((this.options.agent.tools ?? []).map((tool) => tool && tool.name));
+      const toolSet = buildToolSet(this.options.agent);
      let { messages: initialMessages, indexTokenCountMap } = formatAgentMessages(
        payload,
        this.indexTokenCountMap,
@ -1019,6 +805,7 @@ class AgentClient extends BaseClient {

        run = await createRun({
          agents,
+          messages,
          indexTokenCountMap,
          runId: this.responseMessageId,
          signal: abortController.signal,
@ -1054,9 +841,10 @@ class AgentClient extends BaseClient {
        config.signal = null;
      };

+      const hideSequentialOutputs = config.configurable.hide_sequential_outputs;
      await runAgents(initialMessages);
      /** @deprecated Agent Chain */
-      if (config.configurable.hide_sequential_outputs) {
+      if (hideSequentialOutputs) {
        this.contentParts = this.contentParts.filter((part, index) => {
          // Include parts that are either:
          // 1. At or after the finalContentStart index
@ -1091,11 +879,20 @@ class AgentClient extends BaseClient {
          this.artifactPromises.push(...attachments);
        }

-        await this.recordCollectedUsage({
-          context: 'message',
-          balance: balanceConfig,
-          transactions: transactionsConfig,
-        });
+        /** Skip token spending if aborted - the abort handler (abortMiddleware.js) handles it
+        This prevents double-spending when user aborts via `/api/agents/chat/abort` */
+        const wasAborted = abortController?.signal?.aborted;
+        if (!wasAborted) {
+          await this.recordCollectedUsage({
+            context: 'message',
+            balance: balanceConfig,
+            transactions: transactionsConfig,
+          });
+        } else {
+          logger.debug(
+            '[api/server/controllers/agents/client.js #chatCompletion] Skipping token spending - handled by abort middleware',
+          );
+        }
      } catch (err) {
        logger.error(
          '[api/server/controllers/agents/client.js #chatCompletion] Error in cleanup phase',
@ -1120,6 +917,14 @@ class AgentClient extends BaseClient {
    }
    const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
    const { req, agent } = this.options;
+
+    if (req?.body?.isTemporary) {
+      logger.debug(
+        `[api/server/controllers/agents/client.js #titleConvo] Skipping title generation for temporary conversation`,
+      );
+      return;
+    }
+
    const appConfig = req.config;
    let endpoint = agent.endpoint;

@ -1293,6 +1098,7 @@ class AgentClient extends BaseClient {
        model: clientOptions.model,
        balance: balanceConfig,
        transactions: transactionsConfig,
+        messageId: this.responseMessageId,
      }).catch((err) => {
        logger.error(
          '[api/server/controllers/agents/client.js #titleConvo] Error recording collected usage',
@ -1331,6 +1137,7 @@ class AgentClient extends BaseClient {
          model,
          context,
          balance,
+          messageId: this.responseMessageId,
          conversationId: this.conversationId,
          user: this.user ?? this.options.req.user?.id,
          endpointTokenConfig: this.options.endpointTokenConfig,
@ -1349,6 +1156,7 @@ class AgentClient extends BaseClient {
            model,
            balance,
            context: 'reasoning',
+            messageId: this.responseMessageId,
            conversationId: this.conversationId,
            user: this.user ?? this.options.req.user?.id,
            endpointTokenConfig: this.options.endpointTokenConfig,
@ -1364,7 +1172,11 @@ class AgentClient extends BaseClient {
    }
  }

+  /** Anthropic Claude models use a distinct BPE tokenizer; all others default to o200k_base. */
  getEncoding() {
+    if (this.model && this.model.toLowerCase().includes('claude')) {
+      return 'claude';
+    }
    return 'o200k_base';
  }

--- a/api/server/controllers/agents/client.test.js
+++ b/api/server/controllers/agents/client.test.js
@ -12,6 +12,17 @@ jest.mock('@librechat/agents', () => ({

 jest.mock('@librechat/api', () => ({
  ...jest.requireActual('@librechat/api'),
+  checkAccess: jest.fn(),
+  initializeAgent: jest.fn(),
+  createMemoryProcessor: jest.fn(),
+}));
+
+jest.mock('~/models/Agent', () => ({
+  loadAgent: jest.fn(),
+}));
+
+jest.mock('~/models/Role', () => ({
+  getRoleByName: jest.fn(),
 }));

 // Mock getMCPManager
@ -252,6 +263,7 @@ describe('AgentClient - titleConvo', () => {
        transactions: {
          enabled: true,
        },
+        messageId: 'response-123',
      });
    });

@ -336,6 +348,25 @@ describe('AgentClient - titleConvo', () => {
      expect(client.recordCollectedUsage).not.toHaveBeenCalled();
    });

+    it('should skip title generation for temporary chats', async () => {
+      // Set isTemporary to true
+      mockReq.body.isTemporary = true;
+
+      const text = 'Test temporary chat';
+      const abortController = new AbortController();
+
+      const result = await client.titleConvo({ text, abortController });
+
+      // Should return undefined without generating title
+      expect(result).toBeUndefined();
+
+      // generateTitle should NOT have been called
+      expect(mockRun.generateTitle).not.toHaveBeenCalled();
+
+      // recordCollectedUsage should NOT have been called
+      expect(client.recordCollectedUsage).not.toHaveBeenCalled();
+    });
+
    it('should skip title generation when titleConvo is false in all config', async () => {
      // Set titleConvo to false in "all" config
      mockReq.config = {
@ -1291,8 +1322,8 @@ describe('AgentClient - titleConvo', () => {
      expect(client.options.agent.instructions).toContain('# MCP Server Instructions');
      expect(client.options.agent.instructions).toContain('Use these tools carefully');

-      // Verify the base instructions are also included
-      expect(client.options.agent.instructions).toContain('Base instructions');
+      // Verify the base instructions are also included (from agent config, not buildOptions)
+      expect(client.options.agent.instructions).toContain('Base agent instructions');
    });

    it('should handle MCP instructions with ephemeral agent', async () => {
@ -1354,8 +1385,8 @@ describe('AgentClient - titleConvo', () => {
        additional_instructions: null,
      });

-      // Verify the instructions still work without MCP content
-      expect(client.options.agent.instructions).toBe('Base instructions only');
+      // Verify the instructions still work without MCP content (from agent config, not buildOptions)
+      expect(client.options.agent.instructions).toBe('Base agent instructions');
      expect(client.options.agent.instructions).not.toContain('[object Promise]');
    });

@ -1379,8 +1410,8 @@ describe('AgentClient - titleConvo', () => {
        additional_instructions: null,
      });

-      // Should still have base instructions without MCP content
-      expect(client.options.agent.instructions).toContain('Base instructions');
+      // Should still have base instructions without MCP content (from agent config, not buildOptions)
+      expect(client.options.agent.instructions).toContain('Base agent instructions');
      expect(client.options.agent.instructions).not.toContain('[object Promise]');
    });
  });
@ -1830,4 +1861,400 @@ describe('AgentClient - titleConvo', () => {
      });
    });
  });
+
+  describe('buildMessages - memory context for parallel agents', () => {
+    let client;
+    let mockReq;
+    let mockRes;
+    let mockAgent;
+    let mockOptions;
+
+    beforeEach(() => {
+      jest.clearAllMocks();
+
+      mockAgent = {
+        id: 'primary-agent',
+        name: 'Primary Agent',
+        endpoint: EModelEndpoint.openAI,
+        provider: EModelEndpoint.openAI,
+        instructions: 'Primary agent instructions',
+        model_parameters: {
+          model: 'gpt-4',
+        },
+        tools: [],
+      };
+
+      mockReq = {
+        user: {
+          id: 'user-123',
+          personalization: {
+            memories: true,
+          },
+        },
+        body: {
+          endpoint: EModelEndpoint.openAI,
+        },
+        config: {
+          memory: {
+            disabled: false,
+          },
+        },
+      };
+
+      mockRes = {};
+
+      mockOptions = {
+        req: mockReq,
+        res: mockRes,
+        agent: mockAgent,
+        endpoint: EModelEndpoint.agents,
+      };
+
+      client = new AgentClient(mockOptions);
+      client.conversationId = 'convo-123';
+      client.responseMessageId = 'response-123';
+      client.shouldSummarize = false;
+      client.maxContextTokens = 4096;
+    });
+
+    it('should pass memory context to parallel agents (addedConvo)', async () => {
+      const memoryContent = 'User prefers dark mode. User is a software developer.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      const parallelAgent1 = {
+        id: 'parallel-agent-1',
+        name: 'Parallel Agent 1',
+        instructions: 'Parallel agent 1 instructions',
+        provider: EModelEndpoint.openAI,
+      };
+
+      const parallelAgent2 = {
+        id: 'parallel-agent-2',
+        name: 'Parallel Agent 2',
+        instructions: 'Parallel agent 2 instructions',
+        provider: EModelEndpoint.anthropic,
+      };
+
+      client.agentConfigs = new Map([
+        ['parallel-agent-1', parallelAgent1],
+        ['parallel-agent-2', parallelAgent2],
+      ]);
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await client.buildMessages(messages, null, {
+        instructions: 'Base instructions',
+        additional_instructions: null,
+      });
+
+      expect(client.useMemory).toHaveBeenCalled();
+
+      // Verify primary agent has its configured instructions (not from buildOptions) and memory context
+      expect(client.options.agent.instructions).toContain('Primary agent instructions');
+      expect(client.options.agent.instructions).toContain(memoryContent);
+
+      expect(parallelAgent1.instructions).toContain('Parallel agent 1 instructions');
+      expect(parallelAgent1.instructions).toContain(memoryContent);
+
+      expect(parallelAgent2.instructions).toContain('Parallel agent 2 instructions');
+      expect(parallelAgent2.instructions).toContain(memoryContent);
+    });
+
+    it('should not modify parallel agents when no memory context is available', async () => {
+      client.useMemory = jest.fn().mockResolvedValue(undefined);
+
+      const parallelAgent = {
+        id: 'parallel-agent-1',
+        name: 'Parallel Agent 1',
+        instructions: 'Original parallel instructions',
+        provider: EModelEndpoint.openAI,
+      };
+
+      client.agentConfigs = new Map([['parallel-agent-1', parallelAgent]]);
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await client.buildMessages(messages, null, {
+        instructions: 'Base instructions',
+        additional_instructions: null,
+      });
+
+      expect(parallelAgent.instructions).toBe('Original parallel instructions');
+    });
+
+    it('should handle parallel agents without existing instructions', async () => {
+      const memoryContent = 'User is a data scientist.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      const parallelAgentNoInstructions = {
+        id: 'parallel-agent-no-instructions',
+        name: 'Parallel Agent No Instructions',
+        provider: EModelEndpoint.openAI,
+      };
+
+      client.agentConfigs = new Map([
+        ['parallel-agent-no-instructions', parallelAgentNoInstructions],
+      ]);
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await client.buildMessages(messages, null, {
+        instructions: null,
+        additional_instructions: null,
+      });
+
+      expect(parallelAgentNoInstructions.instructions).toContain(memoryContent);
+    });
+
+    it('should not modify agentConfigs when none exist', async () => {
+      const memoryContent = 'User prefers concise responses.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      client.agentConfigs = null;
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await expect(
+        client.buildMessages(messages, null, {
+          instructions: 'Base instructions',
+          additional_instructions: null,
+        }),
+      ).resolves.not.toThrow();
+
+      expect(client.options.agent.instructions).toContain(memoryContent);
+    });
+
+    it('should handle empty agentConfigs map', async () => {
+      const memoryContent = 'User likes detailed explanations.';
+      client.useMemory = jest.fn().mockResolvedValue(memoryContent);
+
+      client.agentConfigs = new Map();
+
+      const messages = [
+        {
+          messageId: 'msg-1',
+          parentMessageId: null,
+          sender: 'User',
+          text: 'Hello',
+          isCreatedByUser: true,
+        },
+      ];
+
+      await expect(
+        client.buildMessages(messages, null, {
+          instructions: 'Base instructions',
+          additional_instructions: null,
+        }),
+      ).resolves.not.toThrow();
+
+      expect(client.options.agent.instructions).toContain(memoryContent);
+    });
+  });
+
+  describe('useMemory method - prelimAgent assignment', () => {
+    let client;
+    let mockReq;
+    let mockRes;
+    let mockAgent;
+    let mockOptions;
+    let mockCheckAccess;
+    let mockLoadAgent;
+    let mockInitializeAgent;
+    let mockCreateMemoryProcessor;
+
+    beforeEach(() => {
+      jest.clearAllMocks();
+
+      mockAgent = {
+        id: 'agent-123',
+        endpoint: EModelEndpoint.openAI,
+        provider: EModelEndpoint.openAI,
+        instructions: 'Test instructions',
+        model: 'gpt-4',
+        model_parameters: {
+          model: 'gpt-4',
+        },
+      };
+
+      mockReq = {
+        user: {
+          id: 'user-123',
+          personalization: {
+            memories: true,
+          },
+        },
+        config: {
+          memory: {
+            agent: {
+              id: 'agent-123',
+            },
+          },
+          endpoints: {
+            [EModelEndpoint.agents]: {
+              allowedProviders: [EModelEndpoint.openAI],
+            },
+          },
+        },
+      };
+
+      mockRes = {};
+
+      mockOptions = {
+        req: mockReq,
+        res: mockRes,
+        agent: mockAgent,
+      };
+
+      mockCheckAccess = require('@librechat/api').checkAccess;
+      mockLoadAgent = require('~/models/Agent').loadAgent;
+      mockInitializeAgent = require('@librechat/api').initializeAgent;
+      mockCreateMemoryProcessor = require('@librechat/api').createMemoryProcessor;
+    });
+
+    it('should use current agent when memory config agent.id matches current agent id', async () => {
+      mockCheckAccess.mockResolvedValue(true);
+      mockInitializeAgent.mockResolvedValue({
+        ...mockAgent,
+        provider: EModelEndpoint.openAI,
+      });
+      mockCreateMemoryProcessor.mockResolvedValue([undefined, jest.fn()]);
+
+      client = new AgentClient(mockOptions);
+      client.conversationId = 'convo-123';
+      client.responseMessageId = 'response-123';
+
+      await client.useMemory();
+
+      expect(mockLoadAgent).not.toHaveBeenCalled();
+      expect(mockInitializeAgent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          agent: mockAgent,
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it('should load different agent when memory config agent.id differs from current agent id', async () => {
+      const differentAgentId = 'different-agent-456';
+      const differentAgent = {
+        id: differentAgentId,
+        provider: EModelEndpoint.openAI,
+        model: 'gpt-4',
+        instructions: 'Different agent instructions',
+      };
+
+      mockReq.config.memory.agent.id = differentAgentId;
+
+      mockCheckAccess.mockResolvedValue(true);
+      mockLoadAgent.mockResolvedValue(differentAgent);
+      mockInitializeAgent.mockResolvedValue({
+        ...differentAgent,
+        provider: EModelEndpoint.openAI,
+      });
+      mockCreateMemoryProcessor.mockResolvedValue([undefined, jest.fn()]);
+
+      client = new AgentClient(mockOptions);
+      client.conversationId = 'convo-123';
+      client.responseMessageId = 'response-123';
+
+      await client.useMemory();
+
+      expect(mockLoadAgent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          agent_id: differentAgentId,
+        }),
+      );
+      expect(mockInitializeAgent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          agent: differentAgent,
+        }),
+        expect.any(Object),
+      );
+    });
+
+    it('should return early when prelimAgent is undefined (no valid memory agent config)', async () => {
+      mockReq.config.memory = {
+        agent: {},
+      };
+
+      mockCheckAccess.mockResolvedValue(true);
+
+      client = new AgentClient(mockOptions);
+      client.conversationId = 'convo-123';
+      client.responseMessageId = 'response-123';
+
+      const result = await client.useMemory();
+
+      expect(result).toBeUndefined();
+      expect(mockInitializeAgent).not.toHaveBeenCalled();
+      expect(mockCreateMemoryProcessor).not.toHaveBeenCalled();
+    });
+
+    it('should create ephemeral agent when no id but model and provider are specified', async () => {
+      mockReq.config.memory = {
+        agent: {
+          model: 'gpt-4',
+          provider: EModelEndpoint.openAI,
+        },
+      };
+
+      mockCheckAccess.mockResolvedValue(true);
+      mockInitializeAgent.mockResolvedValue({
+        id: Constants.EPHEMERAL_AGENT_ID,
+        model: 'gpt-4',
+        provider: EModelEndpoint.openAI,
+      });
+      mockCreateMemoryProcessor.mockResolvedValue([undefined, jest.fn()]);
+
+      client = new AgentClient(mockOptions);
+      client.conversationId = 'convo-123';
+      client.responseMessageId = 'response-123';
+
+      await client.useMemory();
+
+      expect(mockLoadAgent).not.toHaveBeenCalled();
+      expect(mockInitializeAgent).toHaveBeenCalledWith(
+        expect.objectContaining({
+          agent: expect.objectContaining({
+            id: Constants.EPHEMERAL_AGENT_ID,
+            model: 'gpt-4',
+            provider: EModelEndpoint.openAI,
+          }),
+        }),
+        expect.any(Object),
+      );
+    });
+  });
 });
--- a/api/server/controllers/agents/openai.js
+++ b/api/server/controllers/agents/openai.js
@ -0,0 +1,713 @@
+const { nanoid } = require('nanoid');
+const { logger } = require('@librechat/data-schemas');
+const { Callback, ToolEndHandler, formatAgentMessages } = require('@librechat/agents');
+const { EModelEndpoint, ResourceType, PermissionBits } = require('librechat-data-provider');
+const {
+  writeSSE,
+  createRun,
+  createChunk,
+  buildToolSet,
+  sendFinalChunk,
+  createSafeUser,
+  validateRequest,
+  initializeAgent,
+  getBalanceConfig,
+  createErrorResponse,
+  recordCollectedUsage,
+  getTransactionsConfig,
+  createToolExecuteHandler,
+  buildNonStreamingResponse,
+  createOpenAIStreamTracker,
+  createOpenAIContentAggregator,
+  isChatCompletionValidationFailure,
+} = require('@librechat/api');
+const { loadAgentTools, loadToolsForExecution } = require('~/server/services/ToolService');
+const { createToolEndCallback } = require('~/server/controllers/agents/callbacks');
+const { findAccessibleResources } = require('~/server/services/PermissionService');
+const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
+const { getMultiplier, getCacheMultiplier } = require('~/models/tx');
+const { getConvoFiles } = require('~/models/Conversation');
+const { getAgent, getAgents } = require('~/models/Agent');
+const db = require('~/models');
+
+/**
+ * Creates a tool loader function for the agent.
+ * @param {AbortSignal} signal - The abort signal
+ * @param {boolean} [definitionsOnly=true] - When true, returns only serializable
+ *   tool definitions without creating full tool instances (for event-driven mode)
+ */
+function createToolLoader(signal, definitionsOnly = true) {
+  return async function loadTools({
+    req,
+    res,
+    tools,
+    model,
+    agentId,
+    provider,
+    tool_options,
+    tool_resources,
+  }) {
+    const agent = { id: agentId, tools, provider, model, tool_options };
+    try {
+      return await loadAgentTools({
+        req,
+        res,
+        agent,
+        signal,
+        tool_resources,
+        definitionsOnly,
+        streamId: null, // No resumable stream for OpenAI compat
+      });
+    } catch (error) {
+      logger.error('Error loading tools for agent ' + agentId, error);
+    }
+  };
+}
+
+/**
+ * Convert content part to internal format
+ * @param {Object} part - Content part
+ * @returns {Object} Converted part
+ */
+function convertContentPart(part) {
+  if (part.type === 'text') {
+    return { type: 'text', text: part.text };
+  }
+  if (part.type === 'image_url') {
+    return { type: 'image_url', image_url: part.image_url };
+  }
+  return part;
+}
+
+/**
+ * Convert OpenAI messages to internal format
+ * @param {Array} messages - OpenAI format messages
+ * @returns {Array} Internal format messages
+ */
+function convertMessages(messages) {
+  return messages.map((msg) => {
+    let content;
+    if (typeof msg.content === 'string') {
+      content = msg.content;
+    } else if (msg.content) {
+      content = msg.content.map(convertContentPart);
+    } else {
+      content = '';
+    }
+
+    return {
+      role: msg.role,
+      content,
+      ...(msg.name && { name: msg.name }),
+      ...(msg.tool_calls && { tool_calls: msg.tool_calls }),
+      ...(msg.tool_call_id && { tool_call_id: msg.tool_call_id }),
+    };
+  });
+}
+
+/**
+ * Send an error response in OpenAI format
+ */
+function sendErrorResponse(res, statusCode, message, type = 'invalid_request_error', code = null) {
+  res.status(statusCode).json(createErrorResponse(message, type, code));
+}
+
+/**
+ * OpenAI-compatible chat completions controller for agents.
+ *
+ * POST /v1/chat/completions
+ *
+ * Request format:
+ * {
+ *   "model": "agent_id_here",
+ *   "messages": [{"role": "user", "content": "Hello!"}],
+ *   "stream": true,
+ *   "conversation_id": "optional",
+ *   "parent_message_id": "optional"
+ * }
+ */
+const OpenAIChatCompletionController = async (req, res) => {
+  const appConfig = req.config;
+  const requestStartTime = Date.now();
+
+  const validation = validateRequest(req.body);
+  if (isChatCompletionValidationFailure(validation)) {
+    return sendErrorResponse(res, 400, validation.error);
+  }
+
+  const request = validation.request;
+  const agentId = request.model;
+
+  // Look up the agent
+  const agent = await getAgent({ id: agentId });
+  if (!agent) {
+    return sendErrorResponse(
+      res,
+      404,
+      `Agent not found: ${agentId}`,
+      'invalid_request_error',
+      'model_not_found',
+    );
+  }
+
+  const responseId = `chatcmpl-${nanoid()}`;
+  const conversationId = request.conversation_id ?? nanoid();
+  const parentMessageId = request.parent_message_id ?? null;
+  const created = Math.floor(Date.now() / 1000);
+
+  /** @type {import('@librechat/api').OpenAIResponseContext} — key must be `requestId` to match the type used by createChunk/buildNonStreamingResponse */
+  const context = {
+    created,
+    requestId: responseId,
+    model: agentId,
+  };
+
+  logger.debug(
+    `[OpenAI API] Response ${responseId} started for agent ${agentId}, stream: ${request.stream}`,
+  );
+
+  // Set up abort controller
+  const abortController = new AbortController();
+
+  // Handle client disconnect
+  req.on('close', () => {
+    if (!abortController.signal.aborted) {
+      abortController.abort();
+      logger.debug('[OpenAI API] Client disconnected, aborting');
+    }
+  });
+
+  try {
+    // Build allowed providers set
+    const allowedProviders = new Set(
+      appConfig?.endpoints?.[EModelEndpoint.agents]?.allowedProviders,
+    );
+
+    // Create tool loader
+    const loadTools = createToolLoader(abortController.signal);
+
+    // Initialize the agent first to check for disableStreaming
+    const endpointOption = {
+      endpoint: agent.provider,
+      model_parameters: agent.model_parameters ?? {},
+    };
+
+    const primaryConfig = await initializeAgent(
+      {
+        req,
+        res,
+        loadTools,
+        requestFiles: [],
+        conversationId,
+        parentMessageId,
+        agent,
+        endpointOption,
+        allowedProviders,
+        isInitialAgent: true,
+      },
+      {
+        getConvoFiles,
+        getFiles: db.getFiles,
+        getUserKey: db.getUserKey,
+        getMessages: db.getMessages,
+        updateFilesUsage: db.updateFilesUsage,
+        getUserKeyValues: db.getUserKeyValues,
+        getUserCodeFiles: db.getUserCodeFiles,
+        getToolFilesByIds: db.getToolFilesByIds,
+        getCodeGeneratedFiles: db.getCodeGeneratedFiles,
+      },
+    );
+
+    // Determine if streaming is enabled (check both request and agent config)
+    const streamingDisabled = !!primaryConfig.model_parameters?.disableStreaming;
+    const isStreaming = request.stream === true && !streamingDisabled;
+
+    // Create tracker for streaming or aggregator for non-streaming
+    const tracker = isStreaming ? createOpenAIStreamTracker() : null;
+    const aggregator = isStreaming ? null : createOpenAIContentAggregator();
+
+    // Set up response for streaming
+    if (isStreaming) {
+      res.setHeader('Content-Type', 'text/event-stream');
+      res.setHeader('Cache-Control', 'no-cache');
+      res.setHeader('Connection', 'keep-alive');
+      res.setHeader('X-Accel-Buffering', 'no');
+      res.flushHeaders();
+
+      // Send initial chunk with role
+      const initialChunk = createChunk(context, { role: 'assistant' });
+      writeSSE(res, initialChunk);
+    }
+
+    // Create handler config for OpenAI streaming (only used when streaming)
+    const handlerConfig = isStreaming
+      ? {
+          res,
+          context,
+          tracker,
+        }
+      : null;
+
+    const collectedUsage = [];
+    /** @type {Promise<import('librechat-data-provider').TAttachment | null>[]} */
+    const artifactPromises = [];
+
+    const toolEndCallback = createToolEndCallback({ req, res, artifactPromises, streamId: null });
+
+    const toolExecuteOptions = {
+      loadTools: async (toolNames) => {
+        return loadToolsForExecution({
+          req,
+          res,
+          agent,
+          toolNames,
+          signal: abortController.signal,
+          toolRegistry: primaryConfig.toolRegistry,
+          userMCPAuthMap: primaryConfig.userMCPAuthMap,
+          tool_resources: primaryConfig.tool_resources,
+        });
+      },
+      toolEndCallback,
+    };
+
+    const openaiMessages = convertMessages(request.messages);
+
+    const toolSet = buildToolSet(primaryConfig);
+    const { messages: formattedMessages, indexTokenCountMap } = formatAgentMessages(
+      openaiMessages,
+      {},
+      toolSet,
+    );
+
+    /**
+     * Create a simple handler that processes data
+     */
+    const createHandler = (processor) => ({
+      handle: (_event, data) => {
+        if (processor) {
+          processor(data);
+        }
+      },
+    });
+
+    /**
+     * Stream text content in OpenAI format
+     */
+    const streamText = (text) => {
+      if (!text) {
+        return;
+      }
+      if (isStreaming) {
+        tracker.addText();
+        writeSSE(res, createChunk(context, { content: text }));
+      } else {
+        aggregator.addText(text);
+      }
+    };
+
+    /**
+     * Stream reasoning content in OpenAI format (OpenRouter convention)
+     */
+    const streamReasoning = (text) => {
+      if (!text) {
+        return;
+      }
+      if (isStreaming) {
+        tracker.addReasoning();
+        writeSSE(res, createChunk(context, { reasoning: text }));
+      } else {
+        aggregator.addReasoning(text);
+      }
+    };
+
+    // Event handlers for OpenAI-compatible streaming
+    const handlers = {
+      // Text content streaming
+      on_message_delta: createHandler((data) => {
+        const content = data?.delta?.content;
+        if (Array.isArray(content)) {
+          for (const part of content) {
+            if (part.type === 'text' && part.text) {
+              streamText(part.text);
+            }
+          }
+        }
+      }),
+
+      // Reasoning/thinking content streaming
+      on_reasoning_delta: createHandler((data) => {
+        const content = data?.delta?.content;
+        if (Array.isArray(content)) {
+          for (const part of content) {
+            const text = part.think || part.text;
+            if (text) {
+              streamReasoning(text);
+            }
+          }
+        }
+      }),
+
+      // Tool call initiation - streams id and name (from on_run_step)
+      on_run_step: createHandler((data) => {
+        const stepDetails = data?.stepDetails;
+        if (stepDetails?.type === 'tool_calls' && stepDetails.tool_calls) {
+          for (const tc of stepDetails.tool_calls) {
+            const toolIndex = data.index ?? 0;
+            const toolId = tc.id ?? '';
+            const toolName = tc.name ?? '';
+            const toolCall = {
+              id: toolId,
+              type: 'function',
+              function: { name: toolName, arguments: '' },
+            };
+
+            // Track tool call in tracker or aggregator
+            if (isStreaming) {
+              if (!tracker.toolCalls.has(toolIndex)) {
+                tracker.toolCalls.set(toolIndex, toolCall);
+              }
+              // Stream initial tool call chunk (like OpenAI does)
+              writeSSE(
+                res,
+                createChunk(context, {
+                  tool_calls: [{ index: toolIndex, ...toolCall }],
+                }),
+              );
+            } else {
+              if (!aggregator.toolCalls.has(toolIndex)) {
+                aggregator.toolCalls.set(toolIndex, toolCall);
+              }
+            }
+          }
+        }
+      }),
+
+      // Tool call argument streaming (from on_run_step_delta)
+      on_run_step_delta: createHandler((data) => {
+        const delta = data?.delta;
+        if (delta?.type === 'tool_calls' && delta.tool_calls) {
+          for (const tc of delta.tool_calls) {
+            const args = tc.args ?? '';
+            if (!args) {
+              continue;
+            }
+
+            const toolIndex = tc.index ?? 0;
+
+            // Update tool call arguments
+            const targetMap = isStreaming ? tracker.toolCalls : aggregator.toolCalls;
+            const tracked = targetMap.get(toolIndex);
+            if (tracked) {
+              tracked.function.arguments += args;
+            }
+
+            // Stream argument delta (only for streaming)
+            if (isStreaming) {
+              writeSSE(
+                res,
+                createChunk(context, {
+                  tool_calls: [
+                    {
+                      index: toolIndex,
+                      function: { arguments: args },
+                    },
+                  ],
+                }),
+              );
+            }
+          }
+        }
+      }),
+
+      // Usage tracking
+      on_chat_model_end: createHandler((data) => {
+        const usage = data?.output?.usage_metadata;
+        if (usage) {
+          collectedUsage.push(usage);
+          const target = isStreaming ? tracker : aggregator;
+          target.usage.promptTokens += usage.input_tokens ?? 0;
+          target.usage.completionTokens += usage.output_tokens ?? 0;
+        }
+      }),
+      on_run_step_completed: createHandler(),
+      // Use proper ToolEndHandler for processing artifacts (images, file citations, code output)
+      on_tool_end: new ToolEndHandler(toolEndCallback, logger),
+      on_chain_stream: createHandler(),
+      on_chain_end: createHandler(),
+      on_agent_update: createHandler(),
+      on_custom_event: createHandler(),
+      // Event-driven tool execution handler
+      on_tool_execute: createToolExecuteHandler(toolExecuteOptions),
+    };
+
+    // Create and run the agent
+    const userId = req.user?.id ?? 'api-user';
+
+    // Extract userMCPAuthMap from primaryConfig (needed for MCP tool connections)
+    const userMCPAuthMap = primaryConfig.userMCPAuthMap;
+
+    const run = await createRun({
+      agents: [primaryConfig],
+      messages: formattedMessages,
+      indexTokenCountMap,
+      runId: responseId,
+      signal: abortController.signal,
+      customHandlers: handlers,
+      requestBody: {
+        messageId: responseId,
+        conversationId,
+      },
+      user: { id: userId },
+    });
+
+    if (!run) {
+      throw new Error('Failed to create agent run');
+    }
+
+    // Process the stream
+    const config = {
+      runName: 'AgentRun',
+      configurable: {
+        thread_id: conversationId,
+        user_id: userId,
+        user: createSafeUser(req.user),
+        requestBody: {
+          messageId: responseId,
+          conversationId,
+        },
+        ...(userMCPAuthMap != null && { userMCPAuthMap }),
+      },
+      signal: abortController.signal,
+      streamMode: 'values',
+      version: 'v2',
+    };
+
+    await run.processStream({ messages: formattedMessages }, config, {
+      callbacks: {
+        [Callback.TOOL_ERROR]: (graph, error, toolId) => {
+          logger.error(`[OpenAI API] Tool Error "${toolId}"`, error);
+        },
+      },
+    });
+
+    // Record token usage against balance
+    const balanceConfig = getBalanceConfig(appConfig);
+    const transactionsConfig = getTransactionsConfig(appConfig);
+    recordCollectedUsage(
+      {
+        spendTokens,
+        spendStructuredTokens,
+        pricing: { getMultiplier, getCacheMultiplier },
+        bulkWriteOps: { insertMany: db.bulkInsertTransactions, updateBalance: db.updateBalance },
+      },
+      {
+        user: userId,
+        conversationId,
+        collectedUsage,
+        context: 'message',
+        messageId: responseId,
+        balance: balanceConfig,
+        transactions: transactionsConfig,
+        model: primaryConfig.model || agent.model_parameters?.model,
+      },
+    ).catch((err) => {
+      logger.error('[OpenAI API] Error recording usage:', err);
+    });
+
+    // Finalize response
+    const duration = Date.now() - requestStartTime;
+    if (isStreaming) {
+      sendFinalChunk(handlerConfig);
+      res.end();
+      logger.debug(`[OpenAI API] Response ${responseId} completed in ${duration}ms (streaming)`);
+
+      // Wait for artifact processing after response ends (non-blocking)
+      if (artifactPromises.length > 0) {
+        Promise.all(artifactPromises).catch((artifactError) => {
+          logger.warn('[OpenAI API] Error processing artifacts:', artifactError);
+        });
+      }
+    } else {
+      // For non-streaming, wait for artifacts before sending response
+      if (artifactPromises.length > 0) {
+        try {
+          await Promise.all(artifactPromises);
+        } catch (artifactError) {
+          logger.warn('[OpenAI API] Error processing artifacts:', artifactError);
+        }
+      }
+
+      // Build usage from aggregated data
+      const usage = {
+        prompt_tokens: aggregator.usage.promptTokens,
+        completion_tokens: aggregator.usage.completionTokens,
+        total_tokens: aggregator.usage.promptTokens + aggregator.usage.completionTokens,
+      };
+
+      if (aggregator.usage.reasoningTokens > 0) {
+        usage.completion_tokens_details = {
+          reasoning_tokens: aggregator.usage.reasoningTokens,
+        };
+      }
+
+      const response = buildNonStreamingResponse(
+        context,
+        aggregator.getText(),
+        aggregator.getReasoning(),
+        aggregator.toolCalls,
+        usage,
+      );
+      res.json(response);
+      logger.debug(
+        `[OpenAI API] Response ${responseId} completed in ${duration}ms (non-streaming)`,
+      );
+    }
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : 'An error occurred';
+    logger.error('[OpenAI API] Error:', error);
+
+    // Check if we already started streaming (headers sent)
+    if (res.headersSent) {
+      // Headers already sent, send error in stream
+      const errorChunk = createChunk(context, { content: `\n\nError: ${errorMessage}` }, 'stop');
+      writeSSE(res, errorChunk);
+      writeSSE(res, '[DONE]');
+      res.end();
+    } else {
+      // Forward upstream provider status codes (e.g., Anthropic 400s) instead of masking as 500
+      const statusCode =
+        typeof error?.status === 'number' && error.status >= 400 && error.status < 600
+          ? error.status
+          : 500;
+      const errorType =
+        statusCode >= 400 && statusCode < 500 ? 'invalid_request_error' : 'server_error';
+      sendErrorResponse(res, statusCode, errorMessage, errorType);
+    }
+  }
+};
+
+/**
+ * List available agents as models (filtered by remote access permissions)
+ *
+ * GET /v1/models
+ */
+const ListModelsController = async (req, res) => {
+  try {
+    const userId = req.user?.id;
+    const userRole = req.user?.role;
+
+    if (!userId) {
+      return sendErrorResponse(res, 401, 'Authentication required', 'auth_error');
+    }
+
+    // Find agents the user has remote access to (VIEW permission on REMOTE_AGENT)
+    const accessibleAgentIds = await findAccessibleResources({
+      userId,
+      role: userRole,
+      resourceType: ResourceType.REMOTE_AGENT,
+      requiredPermissions: PermissionBits.VIEW,
+    });
+
+    // Get the accessible agents
+    let agents = [];
+    if (accessibleAgentIds.length > 0) {
+      agents = await getAgents({ _id: { $in: accessibleAgentIds } });
+    }
+
+    const models = agents.map((agent) => ({
+      id: agent.id,
+      object: 'model',
+      created: Math.floor(new Date(agent.createdAt || Date.now()).getTime() / 1000),
+      owned_by: 'librechat',
+      permission: [],
+      root: agent.id,
+      parent: null,
+      // LibreChat extensions
+      name: agent.name,
+      description: agent.description,
+      provider: agent.provider,
+    }));
+
+    res.json({
+      object: 'list',
+      data: models,
+    });
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : 'Failed to list models';
+    logger.error('[OpenAI API] Error listing models:', error);
+    sendErrorResponse(res, 500, errorMessage, 'server_error');
+  }
+};
+
+/**
+ * Get a specific model/agent (with remote access permission check)
+ *
+ * GET /v1/models/:model
+ */
+const GetModelController = async (req, res) => {
+  try {
+    const { model } = req.params;
+    const userId = req.user?.id;
+    const userRole = req.user?.role;
+
+    if (!userId) {
+      return sendErrorResponse(res, 401, 'Authentication required', 'auth_error');
+    }
+
+    const agent = await getAgent({ id: model });
+
+    if (!agent) {
+      return sendErrorResponse(
+        res,
+        404,
+        `Model not found: ${model}`,
+        'invalid_request_error',
+        'model_not_found',
+      );
+    }
+
+    // Check if user has remote access to this agent
+    const accessibleAgentIds = await findAccessibleResources({
+      userId,
+      role: userRole,
+      resourceType: ResourceType.REMOTE_AGENT,
+      requiredPermissions: PermissionBits.VIEW,
+    });
+
+    const hasAccess = accessibleAgentIds.some((id) => id.toString() === agent._id.toString());
+
+    if (!hasAccess) {
+      return sendErrorResponse(
+        res,
+        403,
+        `No remote access to model: ${model}`,
+        'permission_error',
+        'access_denied',
+      );
+    }
+
+    res.json({
+      id: agent.id,
+      object: 'model',
+      created: Math.floor(new Date(agent.createdAt || Date.now()).getTime() / 1000),
+      owned_by: 'librechat',
+      permission: [],
+      root: agent.id,
+      parent: null,
+      // LibreChat extensions
+      name: agent.name,
+      description: agent.description,
+      provider: agent.provider,
+    });
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : 'Failed to get model';
+    logger.error('[OpenAI API] Error getting model:', error);
+    sendErrorResponse(res, 500, errorMessage, 'server_error');
+  }
+};
+
+module.exports = {
+  OpenAIChatCompletionController,
+  ListModelsController,
+  GetModelController,
+};
--- a/api/server/controllers/agents/recordCollectedUsage.spec.js
+++ b/api/server/controllers/agents/recordCollectedUsage.spec.js
@ -0,0 +1,364 @@
+/**
+ * Tests for AgentClient.recordCollectedUsage
+ *
+ * This is a critical function that handles token spending for agent LLM calls.
+ * The client now delegates to the TS recordCollectedUsage from @librechat/api,
+ * passing pricing and bulkWriteOps deps.
+ */
+
+const { EModelEndpoint } = require('librechat-data-provider');
+
+const mockSpendTokens = jest.fn().mockResolvedValue();
+const mockSpendStructuredTokens = jest.fn().mockResolvedValue();
+const mockGetMultiplier = jest.fn().mockReturnValue(1);
+const mockGetCacheMultiplier = jest.fn().mockReturnValue(null);
+const mockUpdateBalance = jest.fn().mockResolvedValue({});
+const mockBulkInsertTransactions = jest.fn().mockResolvedValue(undefined);
+const mockRecordCollectedUsage = jest
+  .fn()
+  .mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+
+jest.mock('~/models/spendTokens', () => ({
+  spendTokens: (...args) => mockSpendTokens(...args),
+  spendStructuredTokens: (...args) => mockSpendStructuredTokens(...args),
+}));
+
+jest.mock('~/models/tx', () => ({
+  getMultiplier: mockGetMultiplier,
+  getCacheMultiplier: mockGetCacheMultiplier,
+}));
+
+jest.mock('~/models', () => ({
+  updateBalance: mockUpdateBalance,
+  bulkInsertTransactions: mockBulkInsertTransactions,
+}));
+
+jest.mock('~/config', () => ({
+  logger: {
+    debug: jest.fn(),
+    error: jest.fn(),
+    warn: jest.fn(),
+    info: jest.fn(),
+  },
+  getMCPManager: jest.fn(() => ({
+    formatInstructionsForContext: jest.fn(),
+  })),
+}));
+
+jest.mock('@librechat/agents', () => ({
+  ...jest.requireActual('@librechat/agents'),
+  createMetadataAggregator: () => ({
+    handleLLMEnd: jest.fn(),
+    collected: [],
+  }),
+}));
+
+jest.mock('@librechat/api', () => {
+  const actual = jest.requireActual('@librechat/api');
+  return {
+    ...actual,
+    recordCollectedUsage: (...args) => mockRecordCollectedUsage(...args),
+  };
+});
+
+const AgentClient = require('./client');
+
+describe('AgentClient - recordCollectedUsage', () => {
+  let client;
+  let mockAgent;
+  let mockOptions;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+
+    mockAgent = {
+      id: 'agent-123',
+      endpoint: EModelEndpoint.openAI,
+      provider: EModelEndpoint.openAI,
+      model_parameters: {
+        model: 'gpt-4',
+      },
+    };
+
+    mockOptions = {
+      req: {
+        user: { id: 'user-123' },
+        body: { model: 'gpt-4', endpoint: EModelEndpoint.openAI },
+      },
+      res: {},
+      agent: mockAgent,
+      endpointTokenConfig: {},
+    };
+
+    client = new AgentClient(mockOptions);
+    client.conversationId = 'convo-123';
+    client.user = 'user-123';
+  });
+
+  describe('basic functionality', () => {
+    it('should delegate to recordCollectedUsage with full deps', async () => {
+      const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }];
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      const [deps, params] = mockRecordCollectedUsage.mock.calls[0];
+
+      expect(deps).toHaveProperty('spendTokens');
+      expect(deps).toHaveProperty('spendStructuredTokens');
+      expect(deps).toHaveProperty('pricing');
+      expect(deps.pricing).toHaveProperty('getMultiplier');
+      expect(deps.pricing).toHaveProperty('getCacheMultiplier');
+      expect(deps).toHaveProperty('bulkWriteOps');
+      expect(deps.bulkWriteOps).toHaveProperty('insertMany');
+      expect(deps.bulkWriteOps).toHaveProperty('updateBalance');
+
+      expect(params).toEqual(
+        expect.objectContaining({
+          user: 'user-123',
+          conversationId: 'convo-123',
+          collectedUsage,
+          context: 'message',
+          balance: { enabled: true },
+          transactions: { enabled: true },
+        }),
+      );
+    });
+
+    it('should not set this.usage if collectedUsage is empty (returns undefined)', async () => {
+      mockRecordCollectedUsage.mockResolvedValue(undefined);
+
+      await client.recordCollectedUsage({
+        collectedUsage: [],
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(client.usage).toBeUndefined();
+    });
+
+    it('should not set this.usage if collectedUsage is null (returns undefined)', async () => {
+      mockRecordCollectedUsage.mockResolvedValue(undefined);
+
+      await client.recordCollectedUsage({
+        collectedUsage: null,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(client.usage).toBeUndefined();
+    });
+
+    it('should set this.usage from recordCollectedUsage result', async () => {
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 200, output_tokens: 75 });
+      const collectedUsage = [{ input_tokens: 200, output_tokens: 75, model: 'gpt-4' }];
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(client.usage).toEqual({ input_tokens: 200, output_tokens: 75 });
+    });
+  });
+
+  describe('sequential execution (single agent with tool calls)', () => {
+    it('should pass all usage entries to recordCollectedUsage', async () => {
+      const collectedUsage = [
+        { input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
+        { input_tokens: 150, output_tokens: 30, model: 'gpt-4' },
+        { input_tokens: 180, output_tokens: 20, model: 'gpt-4' },
+      ];
+
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 100, output_tokens: 100 });
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      const [, params] = mockRecordCollectedUsage.mock.calls[0];
+      expect(params.collectedUsage).toHaveLength(3);
+      expect(client.usage.output_tokens).toBe(100);
+      expect(client.usage.input_tokens).toBe(100);
+    });
+  });
+
+  describe('parallel execution (multiple agents)', () => {
+    it('should pass parallel agent usage to recordCollectedUsage', async () => {
+      const collectedUsage = [
+        { input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
+        { input_tokens: 80, output_tokens: 40, model: 'gpt-4' },
+      ];
+
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 100, output_tokens: 90 });
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(client.usage.output_tokens).toBe(90);
+      expect(client.usage.output_tokens).toBeGreaterThan(0);
+    });
+
+    /** Bug regression: parallel agents where second agent has LOWER input tokens produced negative output via incremental calculation. */
+    it('should NOT produce negative output_tokens', async () => {
+      const collectedUsage = [
+        { input_tokens: 200, output_tokens: 100, model: 'gpt-4' },
+        { input_tokens: 50, output_tokens: 30, model: 'gpt-4' },
+      ];
+
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 200, output_tokens: 130 });
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(client.usage.output_tokens).toBeGreaterThan(0);
+      expect(client.usage.output_tokens).toBe(130);
+    });
+  });
+
+  describe('real-world scenarios', () => {
+    it('should correctly handle sequential tool calls with growing context', async () => {
+      const collectedUsage = [
+        { input_tokens: 31596, output_tokens: 151, model: 'claude-opus-4-5-20251101' },
+        { input_tokens: 35368, output_tokens: 150, model: 'claude-opus-4-5-20251101' },
+        { input_tokens: 58362, output_tokens: 295, model: 'claude-opus-4-5-20251101' },
+        { input_tokens: 112604, output_tokens: 193, model: 'claude-opus-4-5-20251101' },
+        { input_tokens: 257440, output_tokens: 2217, model: 'claude-opus-4-5-20251101' },
+      ];
+
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 31596, output_tokens: 3006 });
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(client.usage.input_tokens).toBe(31596);
+      expect(client.usage.output_tokens).toBe(3006);
+    });
+
+    it('should correctly handle cache tokens', async () => {
+      const collectedUsage = [
+        {
+          input_tokens: 788,
+          output_tokens: 163,
+          input_token_details: { cache_read: 0, cache_creation: 30808 },
+          model: 'claude-opus-4-5-20251101',
+        },
+      ];
+
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 31596, output_tokens: 163 });
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      expect(client.usage.input_tokens).toBe(31596);
+      expect(client.usage.output_tokens).toBe(163);
+    });
+  });
+
+  describe('model fallback', () => {
+    it('should use param model when available', async () => {
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+      const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }];
+
+      await client.recordCollectedUsage({
+        model: 'param-model',
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      const [, params] = mockRecordCollectedUsage.mock.calls[0];
+      expect(params.model).toBe('param-model');
+    });
+
+    it('should fallback to client.model when param model is missing', async () => {
+      client.model = 'client-model';
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+      const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }];
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      const [, params] = mockRecordCollectedUsage.mock.calls[0];
+      expect(params.model).toBe('client-model');
+    });
+
+    it('should fallback to agent model_parameters.model as last resort', async () => {
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+      const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }];
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      const [, params] = mockRecordCollectedUsage.mock.calls[0];
+      expect(params.model).toBe('gpt-4');
+    });
+  });
+
+  describe('getStreamUsage integration', () => {
+    it('should return the usage object set by recordCollectedUsage', async () => {
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+      const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }];
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      const usage = client.getStreamUsage();
+      expect(usage).toEqual({ input_tokens: 100, output_tokens: 50 });
+    });
+
+    it('should return undefined before recordCollectedUsage is called', () => {
+      const usage = client.getStreamUsage();
+      expect(usage).toBeUndefined();
+    });
+
+    /** Verifies usage passes the check in BaseClient.sendMessage: if (usage != null && Number(usage[this.outputTokensKey]) > 0) */
+    it('should have output_tokens > 0 for BaseClient.sendMessage check', async () => {
+      mockRecordCollectedUsage.mockResolvedValue({ input_tokens: 200, output_tokens: 130 });
+      const collectedUsage = [
+        { input_tokens: 200, output_tokens: 100, model: 'gpt-4' },
+        { input_tokens: 50, output_tokens: 30, model: 'gpt-4' },
+      ];
+
+      await client.recordCollectedUsage({
+        collectedUsage,
+        balance: { enabled: true },
+        transactions: { enabled: true },
+      });
+
+      const usage = client.getStreamUsage();
+      expect(usage).not.toBeNull();
+      expect(Number(usage.output_tokens)).toBeGreaterThan(0);
+    });
+  });
+});
--- a/api/server/controllers/agents/request.js
+++ b/api/server/controllers/agents/request.js
@ -3,9 +3,9 @@ const { Constants, ViolationTypes } = require('librechat-data-provider');
 const {
  sendEvent,
  getViolationInfo,
+  buildMessageFiles,
  GenerationJobManager,
  decrementPendingRequest,
-  sanitizeFileForTransmit,
  sanitizeMessageForTransmit,
  checkAndIncrementPendingRequest,
 } = require('@librechat/api');
@ -67,7 +67,15 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
  let client = null;

  try {
+    logger.debug(`[ResumableAgentController] Creating job`, {
+      streamId,
+      conversationId,
+      reqConversationId,
+      userId,
+    });
+
    const job = await GenerationJobManager.createJob(streamId, userId, conversationId);
+    const jobCreatedAt = job.createdAt; // Capture creation time to detect job replacement
    req._resumableStreamId = streamId;

    // Send JSON response IMMEDIATELY so client can connect to SSE stream
@ -244,13 +252,10 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
        conversation.title =
          conversation && !conversation.title ? null : conversation?.title || 'New Chat';

-        if (req.body.files && client.options?.attachments) {
-          userMessage.files = [];
-          const messageFiles = new Set(req.body.files.map((file) => file.file_id));
-          for (const attachment of client.options.attachments) {
-            if (messageFiles.has(attachment.file_id)) {
-              userMessage.files.push(sanitizeFileForTransmit(attachment));
-            }
+        if (req.body.files && Array.isArray(client.options.attachments)) {
+          const files = buildMessageFiles(req.body.files, client.options.attachments);
+          if (files.length > 0) {
+            userMessage.files = files;
          }
          delete userMessage.image_urls;
        }
@ -272,6 +277,33 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
          });
        }

+        // CRITICAL: Save response message BEFORE emitting final event.
+        // This prevents race conditions where the client sends a follow-up message
+        // before the response is saved to the database, causing orphaned parentMessageIds.
+        if (client.savedMessageIds && !client.savedMessageIds.has(messageId)) {
+          await saveMessage(
+            req,
+            { ...response, user: userId, unfinished: wasAbortedBeforeComplete },
+            { context: 'api/server/controllers/agents/request.js - resumable response end' },
+          );
+        }
+
+        // Check if our job was replaced by a new request before emitting
+        // This prevents stale requests from emitting events to newer jobs
+        const currentJob = await GenerationJobManager.getJob(streamId);
+        const jobWasReplaced = !currentJob || currentJob.createdAt !== jobCreatedAt;
+
+        if (jobWasReplaced) {
+          logger.debug(`[ResumableAgentController] Skipping FINAL emit - job was replaced`, {
+            streamId,
+            originalCreatedAt: jobCreatedAt,
+            currentCreatedAt: currentJob?.createdAt,
+          });
+          // Still decrement pending request since we incremented at start
+          await decrementPendingRequest(userId);
+          return;
+        }
+
        if (!wasAbortedBeforeComplete) {
          const finalEvent = {
            final: true,
@ -281,27 +313,35 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
            responseMessage: { ...response },
          };

-          GenerationJobManager.emitDone(streamId, finalEvent);
+          logger.debug(`[ResumableAgentController] Emitting FINAL event`, {
+            streamId,
+            wasAbortedBeforeComplete,
+            userMessageId: userMessage?.messageId,
+            responseMessageId: response?.messageId,
+            conversationId: conversation?.conversationId,
+          });
+
+          await GenerationJobManager.emitDone(streamId, finalEvent);
          GenerationJobManager.completeJob(streamId);
          await decrementPendingRequest(userId);
-
-          if (client.savedMessageIds && !client.savedMessageIds.has(messageId)) {
-            await saveMessage(
-              req,
-              { ...response, user: userId },
-              { context: 'api/server/controllers/agents/request.js - resumable response end' },
-            );
-          }
        } else {
          const finalEvent = {
            final: true,
            conversation,
            title: conversation.title,
            requestMessage: sanitizeMessageForTransmit(userMessage),
-            responseMessage: { ...response, error: true },
-            error: { message: 'Request was aborted' },
+            responseMessage: { ...response, unfinished: true },
          };
-          GenerationJobManager.emitDone(streamId, finalEvent);
+
+          logger.debug(`[ResumableAgentController] Emitting ABORTED FINAL event`, {
+            streamId,
+            wasAbortedBeforeComplete,
+            userMessageId: userMessage?.messageId,
+            responseMessageId: response?.messageId,
+            conversationId: conversation?.conversationId,
+          });
+
+          await GenerationJobManager.emitDone(streamId, finalEvent);
          GenerationJobManager.completeJob(streamId, 'Request aborted');
          await decrementPendingRequest(userId);
        }
@ -334,7 +374,7 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
          // abortJob already handled emitDone and completeJob
        } else {
          logger.error(`[ResumableAgentController] Generation error for ${streamId}:`, error);
-          GenerationJobManager.emitError(streamId, error.message || 'Generation failed');
+          await GenerationJobManager.emitError(streamId, error.message || 'Generation failed');
          GenerationJobManager.completeJob(streamId, error.message);
        }

@ -363,7 +403,7 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
      res.status(500).json({ error: error.message || 'Failed to start generation' });
    } else {
      // JSON already sent, emit error to stream so client can receive it
-      GenerationJobManager.emitError(streamId, error.message || 'Failed to start generation');
+      await GenerationJobManager.emitError(streamId, error.message || 'Failed to start generation');
    }
    GenerationJobManager.completeJob(streamId, error.message);
    await decrementPendingRequest(userId);
@ -596,14 +636,10 @@ const _LegacyAgentController = async (req, res, next, initializeClient, addTitle
    conversation.title =
      conversation && !conversation.title ? null : conversation?.title || 'New Chat';

-    // Process files if needed (sanitize to remove large text fields before transmission)
-    if (req.body.files && client.options?.attachments) {
-      userMessage.files = [];
-      const messageFiles = new Set(req.body.files.map((file) => file.file_id));
-      for (const attachment of client.options.attachments) {
-        if (messageFiles.has(attachment.file_id)) {
-          userMessage.files.push(sanitizeFileForTransmit(attachment));
-        }
+    if (req.body.files && Array.isArray(client.options.attachments)) {
+      const files = buildMessageFiles(req.body.files, client.options.attachments);
+      if (files.length > 0) {
+        userMessage.files = files;
      }
      delete userMessage.image_urls;
    }
--- a/api/server/controllers/agents/responses.js
+++ b/api/server/controllers/agents/responses.js
@ -0,0 +1,910 @@
+const { nanoid } = require('nanoid');
+const { v4: uuidv4 } = require('uuid');
+const { logger } = require('@librechat/data-schemas');
+const { Callback, ToolEndHandler, formatAgentMessages } = require('@librechat/agents');
+const { EModelEndpoint, ResourceType, PermissionBits } = require('librechat-data-provider');
+const {
+  createRun,
+  buildToolSet,
+  createSafeUser,
+  initializeAgent,
+  getBalanceConfig,
+  recordCollectedUsage,
+  getTransactionsConfig,
+  createToolExecuteHandler,
+  // Responses API
+  writeDone,
+  buildResponse,
+  generateResponseId,
+  isValidationFailure,
+  emitResponseCreated,
+  createResponseContext,
+  createResponseTracker,
+  setupStreamingResponse,
+  emitResponseInProgress,
+  convertInputToMessages,
+  validateResponseRequest,
+  buildAggregatedResponse,
+  createResponseAggregator,
+  sendResponsesErrorResponse,
+  createResponsesEventHandlers,
+  createAggregatorEventHandlers,
+} = require('@librechat/api');
+const {
+  createResponsesToolEndCallback,
+  createToolEndCallback,
+} = require('~/server/controllers/agents/callbacks');
+const { loadAgentTools, loadToolsForExecution } = require('~/server/services/ToolService');
+const { findAccessibleResources } = require('~/server/services/PermissionService');
+const { getConvoFiles, saveConvo, getConvo } = require('~/models/Conversation');
+const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
+const { getMultiplier, getCacheMultiplier } = require('~/models/tx');
+const { getAgent, getAgents } = require('~/models/Agent');
+const db = require('~/models');
+
+/** @type {import('@librechat/api').AppConfig | null} */
+let appConfig = null;
+
+/**
+ * Set the app config for the controller
+ * @param {import('@librechat/api').AppConfig} config
+ */
+function setAppConfig(config) {
+  appConfig = config;
+}
+
+/**
+ * Creates a tool loader function for the agent.
+ * @param {AbortSignal} signal - The abort signal
+ * @param {boolean} [definitionsOnly=true] - When true, returns only serializable
+ *   tool definitions without creating full tool instances (for event-driven mode)
+ */
+function createToolLoader(signal, definitionsOnly = true) {
+  return async function loadTools({
+    req,
+    res,
+    tools,
+    model,
+    agentId,
+    provider,
+    tool_options,
+    tool_resources,
+  }) {
+    const agent = { id: agentId, tools, provider, model, tool_options };
+    try {
+      return await loadAgentTools({
+        req,
+        res,
+        agent,
+        signal,
+        tool_resources,
+        definitionsOnly,
+        streamId: null,
+      });
+    } catch (error) {
+      logger.error('Error loading tools for agent ' + agentId, error);
+    }
+  };
+}
+
+/**
+ * Convert Open Responses input items to internal messages
+ * @param {import('@librechat/api').InputItem[]} input
+ * @returns {Array} Internal messages
+ */
+function convertToInternalMessages(input) {
+  return convertInputToMessages(input);
+}
+
+/**
+ * Load messages from a previous response/conversation
+ * @param {string} conversationId - The conversation/response ID
+ * @param {string} userId - The user ID
+ * @returns {Promise<Array>} Messages from the conversation
+ */
+async function loadPreviousMessages(conversationId, userId) {
+  try {
+    const messages = await db.getMessages({ conversationId, user: userId });
+    if (!messages || messages.length === 0) {
+      return [];
+    }
+
+    // Convert stored messages to internal format
+    return messages.map((msg) => {
+      const internalMsg = {
+        role: msg.isCreatedByUser ? 'user' : 'assistant',
+        content: '',
+        messageId: msg.messageId,
+      };
+
+      // Handle content - could be string or array
+      if (typeof msg.text === 'string') {
+        internalMsg.content = msg.text;
+      } else if (Array.isArray(msg.content)) {
+        // Handle content parts
+        internalMsg.content = msg.content;
+      } else if (msg.text) {
+        internalMsg.content = String(msg.text);
+      }
+
+      return internalMsg;
+    });
+  } catch (error) {
+    logger.error('[Responses API] Error loading previous messages:', error);
+    return [];
+  }
+}
+
+/**
+ * Save input messages to database
+ * @param {import('express').Request} req
+ * @param {string} conversationId
+ * @param {Array} inputMessages - Internal format messages
+ * @param {string} agentId
+ * @returns {Promise<void>}
+ */
+async function saveInputMessages(req, conversationId, inputMessages, agentId) {
+  for (const msg of inputMessages) {
+    if (msg.role === 'user') {
+      await db.saveMessage(
+        req,
+        {
+          messageId: msg.messageId || nanoid(),
+          conversationId,
+          parentMessageId: null,
+          isCreatedByUser: true,
+          text: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
+          sender: 'User',
+          endpoint: EModelEndpoint.agents,
+          model: agentId,
+        },
+        { context: 'Responses API - save user input' },
+      );
+    }
+  }
+}
+
+/**
+ * Save response output to database
+ * @param {import('express').Request} req
+ * @param {string} conversationId
+ * @param {string} responseId
+ * @param {import('@librechat/api').Response} response
+ * @param {string} agentId
+ * @returns {Promise<void>}
+ */
+async function saveResponseOutput(req, conversationId, responseId, response, agentId) {
+  // Extract text content from output items
+  let responseText = '';
+  for (const item of response.output) {
+    if (item.type === 'message' && item.content) {
+      for (const part of item.content) {
+        if (part.type === 'output_text' && part.text) {
+          responseText += part.text;
+        }
+      }
+    }
+  }
+
+  // Save the assistant message
+  await db.saveMessage(
+    req,
+    {
+      messageId: responseId,
+      conversationId,
+      parentMessageId: null,
+      isCreatedByUser: false,
+      text: responseText,
+      sender: 'Agent',
+      endpoint: EModelEndpoint.agents,
+      model: agentId,
+      finish_reason: response.status === 'completed' ? 'stop' : response.status,
+      tokenCount: response.usage?.output_tokens,
+    },
+    { context: 'Responses API - save assistant response' },
+  );
+}
+
+/**
+ * Save or update conversation
+ * @param {import('express').Request} req
+ * @param {string} conversationId
+ * @param {string} agentId
+ * @param {object} agent
+ * @returns {Promise<void>}
+ */
+async function saveConversation(req, conversationId, agentId, agent) {
+  await saveConvo(
+    req,
+    {
+      conversationId,
+      endpoint: EModelEndpoint.agents,
+      agentId,
+      title: agent?.name || 'Open Responses Conversation',
+      model: agent?.model,
+    },
+    { context: 'Responses API - save conversation' },
+  );
+}
+
+/**
+ * Convert stored messages to Open Responses output format
+ * @param {Array} messages - Stored messages
+ * @returns {Array} Output items
+ */
+function convertMessagesToOutputItems(messages) {
+  const output = [];
+
+  for (const msg of messages) {
+    if (!msg.isCreatedByUser) {
+      output.push({
+        type: 'message',
+        id: msg.messageId,
+        role: 'assistant',
+        status: 'completed',
+        content: [
+          {
+            type: 'output_text',
+            text: msg.text || '',
+            annotations: [],
+          },
+        ],
+      });
+    }
+  }
+
+  return output;
+}
+
+/**
+ * Create Response - POST /v1/responses
+ *
+ * Creates a model response following the Open Responses API specification.
+ * Supports both streaming and non-streaming responses.
+ *
+ * @param {import('express').Request} req
+ * @param {import('express').Response} res
+ */
+const createResponse = async (req, res) => {
+  const requestStartTime = Date.now();
+
+  // Validate request
+  const validation = validateResponseRequest(req.body);
+  if (isValidationFailure(validation)) {
+    return sendResponsesErrorResponse(res, 400, validation.error);
+  }
+
+  const request = validation.request;
+  const agentId = request.model;
+  const isStreaming = request.stream === true;
+
+  // Look up the agent
+  const agent = await getAgent({ id: agentId });
+  if (!agent) {
+    return sendResponsesErrorResponse(
+      res,
+      404,
+      `Agent not found: ${agentId}`,
+      'not_found',
+      'model_not_found',
+    );
+  }
+
+  // Generate IDs
+  const responseId = generateResponseId();
+  const conversationId = request.previous_response_id ?? uuidv4();
+  const parentMessageId = null;
+
+  // Create response context
+  const context = createResponseContext(request, responseId);
+
+  logger.debug(
+    `[Responses API] Request ${responseId} started for agent ${agentId}, stream: ${isStreaming}`,
+  );
+
+  // Set up abort controller
+  const abortController = new AbortController();
+
+  // Handle client disconnect
+  req.on('close', () => {
+    if (!abortController.signal.aborted) {
+      abortController.abort();
+      logger.debug('[Responses API] Client disconnected, aborting');
+    }
+  });
+
+  try {
+    // Build allowed providers set
+    const allowedProviders = new Set(
+      appConfig?.endpoints?.[EModelEndpoint.agents]?.allowedProviders,
+    );
+
+    // Create tool loader
+    const loadTools = createToolLoader(abortController.signal);
+
+    // Initialize the agent first to check for disableStreaming
+    const endpointOption = {
+      endpoint: agent.provider,
+      model_parameters: agent.model_parameters ?? {},
+    };
+
+    const primaryConfig = await initializeAgent(
+      {
+        req,
+        res,
+        loadTools,
+        requestFiles: [],
+        conversationId,
+        parentMessageId,
+        agent,
+        endpointOption,
+        allowedProviders,
+        isInitialAgent: true,
+      },
+      {
+        getConvoFiles,
+        getFiles: db.getFiles,
+        getUserKey: db.getUserKey,
+        getMessages: db.getMessages,
+        updateFilesUsage: db.updateFilesUsage,
+        getUserKeyValues: db.getUserKeyValues,
+        getUserCodeFiles: db.getUserCodeFiles,
+        getToolFilesByIds: db.getToolFilesByIds,
+        getCodeGeneratedFiles: db.getCodeGeneratedFiles,
+      },
+    );
+
+    // Determine if streaming is enabled (check both request and agent config)
+    const streamingDisabled = !!primaryConfig.model_parameters?.disableStreaming;
+    const actuallyStreaming = isStreaming && !streamingDisabled;
+
+    // Load previous messages if previous_response_id is provided
+    let previousMessages = [];
+    if (request.previous_response_id) {
+      const userId = req.user?.id ?? 'api-user';
+      previousMessages = await loadPreviousMessages(request.previous_response_id, userId);
+    }
+
+    // Convert input to internal messages
+    const inputMessages = convertToInternalMessages(
+      typeof request.input === 'string' ? request.input : request.input,
+    );
+
+    // Merge previous messages with new input
+    const allMessages = [...previousMessages, ...inputMessages];
+
+    const toolSet = buildToolSet(primaryConfig);
+    const { messages: formattedMessages, indexTokenCountMap } = formatAgentMessages(
+      allMessages,
+      {},
+      toolSet,
+    );
+
+    // Create tracker for streaming or aggregator for non-streaming
+    const tracker = actuallyStreaming ? createResponseTracker() : null;
+    const aggregator = actuallyStreaming ? null : createResponseAggregator();
+
+    // Set up response for streaming
+    if (actuallyStreaming) {
+      setupStreamingResponse(res);
+
+      // Create handler config
+      const handlerConfig = {
+        res,
+        context,
+        tracker,
+      };
+
+      // Emit response.created then response.in_progress per Open Responses spec
+      emitResponseCreated(handlerConfig);
+      emitResponseInProgress(handlerConfig);
+
+      // Create event handlers
+      const { handlers: responsesHandlers, finalizeStream } =
+        createResponsesEventHandlers(handlerConfig);
+
+      // Collect usage for balance tracking
+      const collectedUsage = [];
+
+      // Artifact promises for processing tool outputs
+      /** @type {Promise<import('librechat-data-provider').TAttachment | null>[]} */
+      const artifactPromises = [];
+      // Use Responses API-specific callback that emits librechat:attachment events
+      const toolEndCallback = createResponsesToolEndCallback({
+        req,
+        res,
+        tracker,
+        artifactPromises,
+      });
+
+      // Create tool execute options for event-driven tool execution
+      const toolExecuteOptions = {
+        loadTools: async (toolNames) => {
+          return loadToolsForExecution({
+            req,
+            res,
+            agent,
+            toolNames,
+            signal: abortController.signal,
+            toolRegistry: primaryConfig.toolRegistry,
+            userMCPAuthMap: primaryConfig.userMCPAuthMap,
+            tool_resources: primaryConfig.tool_resources,
+          });
+        },
+        toolEndCallback,
+      };
+
+      // Combine handlers
+      const handlers = {
+        on_message_delta: responsesHandlers.on_message_delta,
+        on_reasoning_delta: responsesHandlers.on_reasoning_delta,
+        on_run_step: responsesHandlers.on_run_step,
+        on_run_step_delta: responsesHandlers.on_run_step_delta,
+        on_chat_model_end: {
+          handle: (event, data) => {
+            responsesHandlers.on_chat_model_end.handle(event, data);
+            const usage = data?.output?.usage_metadata;
+            if (usage) {
+              collectedUsage.push(usage);
+            }
+          },
+        },
+        on_tool_end: new ToolEndHandler(toolEndCallback, logger),
+        on_run_step_completed: { handle: () => {} },
+        on_chain_stream: { handle: () => {} },
+        on_chain_end: { handle: () => {} },
+        on_agent_update: { handle: () => {} },
+        on_custom_event: { handle: () => {} },
+        on_tool_execute: createToolExecuteHandler(toolExecuteOptions),
+      };
+
+      // Create and run the agent
+      const userId = req.user?.id ?? 'api-user';
+      const userMCPAuthMap = primaryConfig.userMCPAuthMap;
+
+      const run = await createRun({
+        agents: [primaryConfig],
+        messages: formattedMessages,
+        indexTokenCountMap,
+        runId: responseId,
+        signal: abortController.signal,
+        customHandlers: handlers,
+        requestBody: {
+          messageId: responseId,
+          conversationId,
+        },
+        user: { id: userId },
+      });
+
+      if (!run) {
+        throw new Error('Failed to create agent run');
+      }
+
+      // Process the stream
+      const config = {
+        runName: 'AgentRun',
+        configurable: {
+          thread_id: conversationId,
+          user_id: userId,
+          user: createSafeUser(req.user),
+          requestBody: {
+            messageId: responseId,
+            conversationId,
+          },
+          ...(userMCPAuthMap != null && { userMCPAuthMap }),
+        },
+        signal: abortController.signal,
+        streamMode: 'values',
+        version: 'v2',
+      };
+
+      await run.processStream({ messages: formattedMessages }, config, {
+        callbacks: {
+          [Callback.TOOL_ERROR]: (graph, error, toolId) => {
+            logger.error(`[Responses API] Tool Error "${toolId}"`, error);
+          },
+        },
+      });
+
+      // Record token usage against balance
+      const balanceConfig = getBalanceConfig(req.config);
+      const transactionsConfig = getTransactionsConfig(req.config);
+      recordCollectedUsage(
+        {
+          spendTokens,
+          spendStructuredTokens,
+          pricing: { getMultiplier, getCacheMultiplier },
+          bulkWriteOps: { insertMany: db.bulkInsertTransactions, updateBalance: db.updateBalance },
+        },
+        {
+          user: userId,
+          conversationId,
+          collectedUsage,
+          context: 'message',
+          messageId: responseId,
+          balance: balanceConfig,
+          transactions: transactionsConfig,
+          model: primaryConfig.model || agent.model_parameters?.model,
+        },
+      ).catch((err) => {
+        logger.error('[Responses API] Error recording usage:', err);
+      });
+
+      // Finalize the stream
+      finalizeStream();
+      res.end();
+
+      const duration = Date.now() - requestStartTime;
+      logger.debug(`[Responses API] Request ${responseId} completed in ${duration}ms (streaming)`);
+
+      // Save to database if store: true
+      if (request.store === true) {
+        try {
+          // Save conversation
+          await saveConversation(req, conversationId, agentId, agent);
+
+          // Save input messages
+          await saveInputMessages(req, conversationId, inputMessages, agentId);
+
+          // Build response for saving (use tracker with buildResponse for streaming)
+          const finalResponse = buildResponse(context, tracker, 'completed');
+          await saveResponseOutput(req, conversationId, responseId, finalResponse, agentId);
+
+          logger.debug(
+            `[Responses API] Stored response ${responseId} in conversation ${conversationId}`,
+          );
+        } catch (saveError) {
+          logger.error('[Responses API] Error saving response:', saveError);
+          // Don't fail the request if saving fails
+        }
+      }
+
+      // Wait for artifact processing after response ends (non-blocking)
+      if (artifactPromises.length > 0) {
+        Promise.all(artifactPromises).catch((artifactError) => {
+          logger.warn('[Responses API] Error processing artifacts:', artifactError);
+        });
+      }
+    } else {
+      const aggregatorHandlers = createAggregatorEventHandlers(aggregator);
+
+      // Collect usage for balance tracking
+      const collectedUsage = [];
+
+      /** @type {Promise<import('librechat-data-provider').TAttachment | null>[]} */
+      const artifactPromises = [];
+      const toolEndCallback = createToolEndCallback({ req, res, artifactPromises, streamId: null });
+
+      const toolExecuteOptions = {
+        loadTools: async (toolNames) => {
+          return loadToolsForExecution({
+            req,
+            res,
+            agent,
+            toolNames,
+            signal: abortController.signal,
+            toolRegistry: primaryConfig.toolRegistry,
+            userMCPAuthMap: primaryConfig.userMCPAuthMap,
+            tool_resources: primaryConfig.tool_resources,
+          });
+        },
+        toolEndCallback,
+      };
+
+      const handlers = {
+        on_message_delta: aggregatorHandlers.on_message_delta,
+        on_reasoning_delta: aggregatorHandlers.on_reasoning_delta,
+        on_run_step: aggregatorHandlers.on_run_step,
+        on_run_step_delta: aggregatorHandlers.on_run_step_delta,
+        on_chat_model_end: {
+          handle: (event, data) => {
+            aggregatorHandlers.on_chat_model_end.handle(event, data);
+            const usage = data?.output?.usage_metadata;
+            if (usage) {
+              collectedUsage.push(usage);
+            }
+          },
+        },
+        on_tool_end: new ToolEndHandler(toolEndCallback, logger),
+        on_run_step_completed: { handle: () => {} },
+        on_chain_stream: { handle: () => {} },
+        on_chain_end: { handle: () => {} },
+        on_agent_update: { handle: () => {} },
+        on_custom_event: { handle: () => {} },
+        on_tool_execute: createToolExecuteHandler(toolExecuteOptions),
+      };
+
+      const userId = req.user?.id ?? 'api-user';
+      const userMCPAuthMap = primaryConfig.userMCPAuthMap;
+
+      const run = await createRun({
+        agents: [primaryConfig],
+        messages: formattedMessages,
+        indexTokenCountMap,
+        runId: responseId,
+        signal: abortController.signal,
+        customHandlers: handlers,
+        requestBody: {
+          messageId: responseId,
+          conversationId,
+        },
+        user: { id: userId },
+      });
+
+      if (!run) {
+        throw new Error('Failed to create agent run');
+      }
+
+      const config = {
+        runName: 'AgentRun',
+        configurable: {
+          thread_id: conversationId,
+          user_id: userId,
+          user: createSafeUser(req.user),
+          requestBody: {
+            messageId: responseId,
+            conversationId,
+          },
+          ...(userMCPAuthMap != null && { userMCPAuthMap }),
+        },
+        signal: abortController.signal,
+        streamMode: 'values',
+        version: 'v2',
+      };
+
+      await run.processStream({ messages: formattedMessages }, config, {
+        callbacks: {
+          [Callback.TOOL_ERROR]: (graph, error, toolId) => {
+            logger.error(`[Responses API] Tool Error "${toolId}"`, error);
+          },
+        },
+      });
+
+      // Record token usage against balance
+      const balanceConfig = getBalanceConfig(req.config);
+      const transactionsConfig = getTransactionsConfig(req.config);
+      recordCollectedUsage(
+        {
+          spendTokens,
+          spendStructuredTokens,
+          pricing: { getMultiplier, getCacheMultiplier },
+          bulkWriteOps: { insertMany: db.bulkInsertTransactions, updateBalance: db.updateBalance },
+        },
+        {
+          user: userId,
+          conversationId,
+          collectedUsage,
+          context: 'message',
+          messageId: responseId,
+          balance: balanceConfig,
+          transactions: transactionsConfig,
+          model: primaryConfig.model || agent.model_parameters?.model,
+        },
+      ).catch((err) => {
+        logger.error('[Responses API] Error recording usage:', err);
+      });
+
+      if (artifactPromises.length > 0) {
+        try {
+          await Promise.all(artifactPromises);
+        } catch (artifactError) {
+          logger.warn('[Responses API] Error processing artifacts:', artifactError);
+        }
+      }
+
+      const response = buildAggregatedResponse(context, aggregator);
+
+      if (request.store === true) {
+        try {
+          await saveConversation(req, conversationId, agentId, agent);
+
+          await saveInputMessages(req, conversationId, inputMessages, agentId);
+
+          await saveResponseOutput(req, conversationId, responseId, response, agentId);
+
+          logger.debug(
+            `[Responses API] Stored response ${responseId} in conversation ${conversationId}`,
+          );
+        } catch (saveError) {
+          logger.error('[Responses API] Error saving response:', saveError);
+          // Don't fail the request if saving fails
+        }
+      }
+
+      res.json(response);
+
+      const duration = Date.now() - requestStartTime;
+      logger.debug(
+        `[Responses API] Request ${responseId} completed in ${duration}ms (non-streaming)`,
+      );
+    }
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : 'An error occurred';
+    logger.error('[Responses API] Error:', error);
+
+    // Check if we already started streaming (headers sent)
+    if (res.headersSent) {
+      // Headers already sent, write error event and close
+      writeDone(res);
+      res.end();
+    } else {
+      // Forward upstream provider status codes (e.g., Anthropic 400s) instead of masking as 500
+      const statusCode =
+        typeof error?.status === 'number' && error.status >= 400 && error.status < 600
+          ? error.status
+          : 500;
+      const errorType = statusCode >= 400 && statusCode < 500 ? 'invalid_request' : 'server_error';
+      sendResponsesErrorResponse(res, statusCode, errorMessage, errorType);
+    }
+  }
+};
+
+/**
+ * List available agents as models - GET /v1/models (also works with /v1/responses/models)
+ *
+ * Returns a list of available agents the user has remote access to.
+ *
+ * @param {import('express').Request} req
+ * @param {import('express').Response} res
+ */
+const listModels = async (req, res) => {
+  try {
+    const userId = req.user?.id;
+    const userRole = req.user?.role;
+
+    if (!userId) {
+      return sendResponsesErrorResponse(res, 401, 'Authentication required', 'auth_error');
+    }
+
+    // Find agents the user has remote access to (VIEW permission on REMOTE_AGENT)
+    const accessibleAgentIds = await findAccessibleResources({
+      userId,
+      role: userRole,
+      resourceType: ResourceType.REMOTE_AGENT,
+      requiredPermissions: PermissionBits.VIEW,
+    });
+
+    // Get the accessible agents
+    let agents = [];
+    if (accessibleAgentIds.length > 0) {
+      agents = await getAgents({ _id: { $in: accessibleAgentIds } });
+    }
+
+    // Convert to models format
+    const models = agents.map((agent) => ({
+      id: agent.id,
+      object: 'model',
+      created: Math.floor(new Date(agent.createdAt).getTime() / 1000),
+      owned_by: agent.author ?? 'librechat',
+      // Additional metadata
+      name: agent.name,
+      description: agent.description,
+      provider: agent.provider,
+    }));
+
+    res.json({
+      object: 'list',
+      data: models,
+    });
+  } catch (error) {
+    logger.error('[Responses API] Error listing models:', error);
+    sendResponsesErrorResponse(
+      res,
+      500,
+      error instanceof Error ? error.message : 'Failed to list models',
+      'server_error',
+    );
+  }
+};
+
+/**
+ * Get Response - GET /v1/responses/:id
+ *
+ * Retrieves a stored response by its ID.
+ * The response ID maps to a conversationId in LibreChat's storage.
+ *
+ * @param {import('express').Request} req
+ * @param {import('express').Response} res
+ */
+const getResponse = async (req, res) => {
+  try {
+    const responseId = req.params.id;
+    const userId = req.user?.id;
+
+    if (!responseId) {
+      return sendResponsesErrorResponse(res, 400, 'Response ID is required');
+    }
+
+    // The responseId could be either the response ID or the conversation ID
+    // Try to find a conversation with this ID
+    const conversation = await getConvo(userId, responseId);
+
+    if (!conversation) {
+      return sendResponsesErrorResponse(
+        res,
+        404,
+        `Response not found: ${responseId}`,
+        'not_found',
+        'response_not_found',
+      );
+    }
+
+    // Load messages for this conversation
+    const messages = await db.getMessages({ conversationId: responseId, user: userId });
+
+    if (!messages || messages.length === 0) {
+      return sendResponsesErrorResponse(
+        res,
+        404,
+        `No messages found for response: ${responseId}`,
+        'not_found',
+        'response_not_found',
+      );
+    }
+
+    // Convert messages to Open Responses output format
+    const output = convertMessagesToOutputItems(messages);
+
+    // Find the last assistant message for usage info
+    const lastAssistantMessage = messages.filter((m) => !m.isCreatedByUser).pop();
+
+    // Build the response object
+    const response = {
+      id: responseId,
+      object: 'response',
+      created_at: Math.floor(new Date(conversation.createdAt || Date.now()).getTime() / 1000),
+      completed_at: Math.floor(new Date(conversation.updatedAt || Date.now()).getTime() / 1000),
+      status: 'completed',
+      incomplete_details: null,
+      model: conversation.agentId || conversation.model || 'unknown',
+      previous_response_id: null,
+      instructions: null,
+      output,
+      error: null,
+      tools: [],
+      tool_choice: 'auto',
+      truncation: 'disabled',
+      parallel_tool_calls: true,
+      text: { format: { type: 'text' } },
+      temperature: 1,
+      top_p: 1,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      top_logprobs: null,
+      reasoning: null,
+      user: userId,
+      usage: lastAssistantMessage?.tokenCount
+        ? {
+            input_tokens: 0,
+            output_tokens: lastAssistantMessage.tokenCount,
+            total_tokens: lastAssistantMessage.tokenCount,
+          }
+        : null,
+      max_output_tokens: null,
+      max_tool_calls: null,
+      store: true,
+      background: false,
+      service_tier: 'default',
+      metadata: {},
+      safety_identifier: null,
+      prompt_cache_key: null,
+    };
+
+    res.json(response);
+  } catch (error) {
+    logger.error('[Responses API] Error getting response:', error);
+    sendResponsesErrorResponse(
+      res,
+      500,
+      error instanceof Error ? error.message : 'Failed to get response',
+      'server_error',
+    );
+  }
+};
+
+module.exports = {
+  createResponse,
+  getResponse,
+  listModels,
+  setAppConfig,
+};
--- a/api/server/controllers/agents/v1.js
+++ b/api/server/controllers/agents/v1.js
@ -5,11 +5,15 @@ const { logger } = require('@librechat/data-schemas');
 const {
  agentCreateSchema,
  agentUpdateSchema,
+  refreshListAvatars,
  mergeAgentOcrConversion,
+  MAX_AVATAR_REFRESH_AGENTS,
  convertOcrToContextInPlace,
 } = require('@librechat/api');
 const {
+  Time,
  Tools,
+  CacheKeys,
  Constants,
  FileSources,
  ResourceType,
@ -19,8 +23,6 @@ const {
  PermissionBits,
  actionDelimiter,
  removeNullishValues,
-  CacheKeys,
-  Time,
 } = require('librechat-data-provider');
 const {
  getListAgentsByAccess,
@ -56,46 +58,6 @@ const systemTools = {
 const MAX_SEARCH_LEN = 100;
 const escapeRegex = (str = '') => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');

-/**
- * Opportunistically refreshes S3-backed avatars for agent list responses.
- * Only list responses are refreshed because they're the highest-traffic surface and
- * the avatar URLs have a short-lived TTL. The refresh is cached per-user for 30 minutes
- * via {@link CacheKeys.S3_EXPIRY_INTERVAL} so we refresh once per interval at most.
- * @param {Array} agents - Agents being enriched with S3-backed avatars
- * @param {string} userId - User identifier used for the cache refresh key
- */
-const refreshListAvatars = async (agents, userId) => {
-  if (!agents?.length) {
-    return;
-  }
-
-  const cache = getLogStores(CacheKeys.S3_EXPIRY_INTERVAL);
-  const refreshKey = `${userId}:agents_list`;
-  const alreadyChecked = await cache.get(refreshKey);
-  if (alreadyChecked) {
-    return;
-  }
-
-  await Promise.all(
-    agents.map(async (agent) => {
-      if (agent?.avatar?.source !== FileSources.s3 || !agent?.avatar?.filepath) {
-        return;
-      }
-
-      try {
-        const newPath = await refreshS3Url(agent.avatar);
-        if (newPath && newPath !== agent.avatar.filepath) {
-          agent.avatar = { ...agent.avatar, filepath: newPath };
-        }
-      } catch (err) {
-        logger.debug('[/Agents] Avatar refresh error for list item', err);
-      }
-    }),
-  );
-
-  await cache.set(refreshKey, true, Time.THIRTY_MINUTES);
-};
-
 /**
 * Creates an Agent.
 * @route POST /Agents
@ -119,7 +81,7 @@ const createAgentHandler = async (req, res) => {
    agentData.author = userId;
    agentData.tools = [];

-    const availableTools = await getCachedTools();
+    const availableTools = (await getCachedTools()) ?? {};
    for (const tool of tools) {
      if (availableTools[tool]) {
        agentData.tools.push(tool);
@ -132,16 +94,25 @@ const createAgentHandler = async (req, res) => {

    const agent = await createAgent(agentData);

-    // Automatically grant owner permissions to the creator
    try {
-      await grantPermission({
-        principalType: PrincipalType.USER,
-        principalId: userId,
-        resourceType: ResourceType.AGENT,
-        resourceId: agent._id,
-        accessRoleId: AccessRoleIds.AGENT_OWNER,
-        grantedBy: userId,
-      });
+      await Promise.all([
+        grantPermission({
+          principalType: PrincipalType.USER,
+          principalId: userId,
+          resourceType: ResourceType.AGENT,
+          resourceId: agent._id,
+          accessRoleId: AccessRoleIds.AGENT_OWNER,
+          grantedBy: userId,
+        }),
+        grantPermission({
+          principalType: PrincipalType.USER,
+          principalId: userId,
+          resourceType: ResourceType.REMOTE_AGENT,
+          resourceId: agent._id,
+          accessRoleId: AccessRoleIds.REMOTE_AGENT_OWNER,
+          grantedBy: userId,
+        }),
+      ]);
      logger.debug(
        `[createAgent] Granted owner permissions to user ${userId} for agent ${agent.id}`,
      );
@ -434,16 +405,25 @@ const duplicateAgentHandler = async (req, res) => {
    newAgentData.actions = agentActions;
    const newAgent = await createAgent(newAgentData);

-    // Automatically grant owner permissions to the duplicator
    try {
-      await grantPermission({
-        principalType: PrincipalType.USER,
-        principalId: userId,
-        resourceType: ResourceType.AGENT,
-        resourceId: newAgent._id,
-        accessRoleId: AccessRoleIds.AGENT_OWNER,
-        grantedBy: userId,
-      });
+      await Promise.all([
+        grantPermission({
+          principalType: PrincipalType.USER,
+          principalId: userId,
+          resourceType: ResourceType.AGENT,
+          resourceId: newAgent._id,
+          accessRoleId: AccessRoleIds.AGENT_OWNER,
+          grantedBy: userId,
+        }),
+        grantPermission({
+          principalType: PrincipalType.USER,
+          principalId: userId,
+          resourceType: ResourceType.REMOTE_AGENT,
+          resourceId: newAgent._id,
+          accessRoleId: AccessRoleIds.REMOTE_AGENT_OWNER,
+          grantedBy: userId,
+        }),
+      ]);
      logger.debug(
        `[duplicateAgent] Granted owner permissions to user ${userId} for duplicated agent ${newAgent.id}`,
      );
@ -544,6 +524,38 @@ const getListAgentsHandler = async (req, res) => {
      requiredPermissions: PermissionBits.VIEW,
    });

+    /**
+     * Refresh all S3 avatars for this user's accessible agent set (not only the current page)
+     * This addresses page-size limits preventing refresh of agents beyond the first page
+     */
+    const cache = getLogStores(CacheKeys.S3_EXPIRY_INTERVAL);
+    const refreshKey = `${userId}:agents_avatar_refresh`;
+    let cachedRefresh = await cache.get(refreshKey);
+    const isValidCachedRefresh =
+      cachedRefresh != null && typeof cachedRefresh === 'object' && cachedRefresh.urlCache != null;
+    if (!isValidCachedRefresh) {
+      try {
+        const fullList = await getListAgentsByAccess({
+          accessibleIds,
+          otherParams: {},
+          limit: MAX_AVATAR_REFRESH_AGENTS,
+          after: null,
+        });
+        const { urlCache } = await refreshListAvatars({
+          agents: fullList?.data ?? [],
+          userId,
+          refreshS3Url,
+          updateAgent,
+        });
+        cachedRefresh = { urlCache };
+        await cache.set(refreshKey, cachedRefresh, Time.THIRTY_MINUTES);
+      } catch (err) {
+        logger.error('[/Agents] Error refreshing avatars for full list: %o', err);
+      }
+    } else {
+      logger.debug('[/Agents] S3 avatar refresh already checked, skipping');
+    }
+
    // Use the new ACL-aware function
    const data = await getListAgentsByAccess({
      accessibleIds,
@ -559,11 +571,20 @@ const getListAgentsHandler = async (req, res) => {

    const publicSet = new Set(publiclyAccessibleIds.map((oid) => oid.toString()));

+    const urlCache = cachedRefresh?.urlCache;
    data.data = agents.map((agent) => {
      try {
        if (agent?._id && publicSet.has(agent._id.toString())) {
          agent.isPublic = true;
        }
+        if (
+          urlCache &&
+          agent?.id &&
+          agent?.avatar?.source === FileSources.s3 &&
+          urlCache[agent.id]
+        ) {
+          agent.avatar = { ...agent.avatar, filepath: urlCache[agent.id] };
+        }
      } catch (e) {
        // Silently ignore mapping errors
        void e;
@ -571,15 +592,9 @@ const getListAgentsHandler = async (req, res) => {
      return agent;
    });

-    // Opportunistically refresh S3 avatar URLs for list results with caching
-    try {
-      await refreshListAvatars(data.data, req.user.id);
-    } catch (err) {
-      logger.debug('[/Agents] Skipping avatar refresh for list', err);
-    }
    return res.json(data);
  } catch (error) {
-    logger.error('[/Agents] Error listing Agents', error);
+    logger.error('[/Agents] Error listing Agents: %o', error);
    res.status(500).json({ error: error.message });
  }
 };
@ -655,6 +670,14 @@ const uploadAgentAvatarHandler = async (req, res) => {
    const updatedAgent = await updateAgent({ id: agent_id }, data, {
      updatingUserId: req.user.id,
    });
+
+    try {
+      const avatarCache = getLogStores(CacheKeys.S3_EXPIRY_INTERVAL);
+      await avatarCache.delete(`${req.user.id}:agents_avatar_refresh`);
+    } catch (cacheErr) {
+      logger.error('[/:agent_id/avatar] Error invalidating avatar refresh cache', cacheErr);
+    }
+
    res.status(201).json(updatedAgent);
  } catch (error) {
    const message = 'An error occurred while updating the Agent Avatar';
--- a/api/server/controllers/agents/v1.spec.js
+++ b/api/server/controllers/agents/v1.spec.js
@ -1,8 +1,9 @@
 const mongoose = require('mongoose');
-const { v4: uuidv4 } = require('uuid');
 const { nanoid } = require('nanoid');
-const { MongoMemoryServer } = require('mongodb-memory-server');
+const { v4: uuidv4 } = require('uuid');
 const { agentSchema } = require('@librechat/data-schemas');
+const { FileSources } = require('librechat-data-provider');
+const { MongoMemoryServer } = require('mongodb-memory-server');

 // Only mock the dependencies that are not database-related
 jest.mock('~/server/services/Config', () => ({
@ -54,6 +55,16 @@ jest.mock('~/models', () => ({
  getCategoriesWithCounts: jest.fn(),
 }));

+// Mock cache for S3 avatar refresh tests
+const mockCache = {
+  get: jest.fn(),
+  set: jest.fn(),
+  delete: jest.fn(),
+};
+jest.mock('~/cache', () => ({
+  getLogStores: jest.fn(() => mockCache),
+}));
+
 const {
  createAgent: createAgentHandler,
  updateAgent: updateAgentHandler,
@ -65,6 +76,8 @@ const {
  findPubliclyAccessibleResources,
 } = require('~/server/services/PermissionService');

+const { refreshS3Url } = require('~/server/services/Files/S3/crud');
+
 /**
 * @type {import('mongoose').Model<import('@librechat/data-schemas').IAgent>}
 */
@ -1207,4 +1220,431 @@ describe('Agent Controllers - Mass Assignment Protection', () => {
      expect(response.data[0].is_promoted).toBe(true);
    });
  });
+
+  describe('S3 Avatar Refresh', () => {
+    let userA, userB;
+    let agentWithS3Avatar, agentWithLocalAvatar, agentOwnedByOther;
+
+    beforeEach(async () => {
+      await Agent.deleteMany({});
+      jest.clearAllMocks();
+
+      // Reset cache mock
+      mockCache.get.mockResolvedValue(false);
+      mockCache.set.mockResolvedValue(undefined);
+
+      userA = new mongoose.Types.ObjectId();
+      userB = new mongoose.Types.ObjectId();
+
+      // Create agent with S3 avatar owned by userA
+      agentWithS3Avatar = await Agent.create({
+        id: `agent_${nanoid(12)}`,
+        name: 'Agent with S3 Avatar',
+        description: 'Has S3 avatar',
+        provider: 'openai',
+        model: 'gpt-4',
+        author: userA,
+        avatar: {
+          source: FileSources.s3,
+          filepath: 'old-s3-path.jpg',
+        },
+        versions: [
+          {
+            name: 'Agent with S3 Avatar',
+            description: 'Has S3 avatar',
+            provider: 'openai',
+            model: 'gpt-4',
+            createdAt: new Date(),
+            updatedAt: new Date(),
+          },
+        ],
+      });
+
+      // Create agent with local avatar owned by userA
+      agentWithLocalAvatar = await Agent.create({
+        id: `agent_${nanoid(12)}`,
+        name: 'Agent with Local Avatar',
+        description: 'Has local avatar',
+        provider: 'openai',
+        model: 'gpt-4',
+        author: userA,
+        avatar: {
+          source: 'local',
+          filepath: 'local-path.jpg',
+        },
+        versions: [
+          {
+            name: 'Agent with Local Avatar',
+            description: 'Has local avatar',
+            provider: 'openai',
+            model: 'gpt-4',
+            createdAt: new Date(),
+            updatedAt: new Date(),
+          },
+        ],
+      });
+
+      // Create agent with S3 avatar owned by userB
+      agentOwnedByOther = await Agent.create({
+        id: `agent_${nanoid(12)}`,
+        name: 'Agent Owned By Other',
+        description: 'Owned by userB',
+        provider: 'openai',
+        model: 'gpt-4',
+        author: userB,
+        avatar: {
+          source: FileSources.s3,
+          filepath: 'other-s3-path.jpg',
+        },
+        versions: [
+          {
+            name: 'Agent Owned By Other',
+            description: 'Owned by userB',
+            provider: 'openai',
+            model: 'gpt-4',
+            createdAt: new Date(),
+            updatedAt: new Date(),
+          },
+        ],
+      });
+    });
+
+    test('should skip avatar refresh if cache hit', async () => {
+      mockCache.get.mockResolvedValue({ urlCache: {} });
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Should not call refreshS3Url when cache hit
+      expect(refreshS3Url).not.toHaveBeenCalled();
+    });
+
+    test('should refresh and persist S3 avatars on cache miss', async () => {
+      mockCache.get.mockResolvedValue(false);
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      refreshS3Url.mockResolvedValue('new-s3-path.jpg');
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Verify S3 URL was refreshed
+      expect(refreshS3Url).toHaveBeenCalled();
+
+      // Verify cache was set with urlCache map, not a plain boolean
+      expect(mockCache.set).toHaveBeenCalledWith(
+        expect.any(String),
+        expect.objectContaining({ urlCache: expect.any(Object) }),
+        expect.any(Number),
+      );
+
+      // Verify response was returned
+      expect(mockRes.json).toHaveBeenCalled();
+    });
+
+    test('should refresh avatars for all accessible agents (VIEW permission)', async () => {
+      mockCache.get.mockResolvedValue(false);
+      // User A has access to both their own agent and userB's agent
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id, agentOwnedByOther._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      refreshS3Url.mockResolvedValue('new-path.jpg');
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Should be called for both agents - any user with VIEW access can refresh
+      expect(refreshS3Url).toHaveBeenCalledTimes(2);
+    });
+
+    test('should skip non-S3 avatars', async () => {
+      mockCache.get.mockResolvedValue(false);
+      findAccessibleResources.mockResolvedValue([agentWithLocalAvatar._id, agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      refreshS3Url.mockResolvedValue('new-path.jpg');
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Should only be called for S3 avatar agent
+      expect(refreshS3Url).toHaveBeenCalledTimes(1);
+    });
+
+    test('should not update if S3 URL unchanged', async () => {
+      mockCache.get.mockResolvedValue(false);
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      // Return the same path - no update needed
+      refreshS3Url.mockResolvedValue('old-s3-path.jpg');
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Verify refreshS3Url was called
+      expect(refreshS3Url).toHaveBeenCalled();
+
+      // Response should still be returned
+      expect(mockRes.json).toHaveBeenCalled();
+    });
+
+    test('should handle S3 refresh errors gracefully', async () => {
+      mockCache.get.mockResolvedValue(false);
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      refreshS3Url.mockRejectedValue(new Error('S3 error'));
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      // Should not throw - handles error gracefully
+      await expect(getListAgentsHandler(mockReq, mockRes)).resolves.not.toThrow();
+
+      // Response should still be returned
+      expect(mockRes.json).toHaveBeenCalled();
+    });
+
+    test('should process agents in batches', async () => {
+      mockCache.get.mockResolvedValue(false);
+
+      // Create 25 agents (should be processed in batches of 20)
+      const manyAgents = [];
+      for (let i = 0; i < 25; i++) {
+        const agent = await Agent.create({
+          id: `agent_${nanoid(12)}`,
+          name: `Agent ${i}`,
+          description: `Agent ${i} description`,
+          provider: 'openai',
+          model: 'gpt-4',
+          author: userA,
+          avatar: {
+            source: FileSources.s3,
+            filepath: `path${i}.jpg`,
+          },
+          versions: [
+            {
+              name: `Agent ${i}`,
+              description: `Agent ${i} description`,
+              provider: 'openai',
+              model: 'gpt-4',
+              createdAt: new Date(),
+              updatedAt: new Date(),
+            },
+          ],
+        });
+        manyAgents.push(agent);
+      }
+
+      const allAgentIds = manyAgents.map((a) => a._id);
+      findAccessibleResources.mockResolvedValue(allAgentIds);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      refreshS3Url.mockImplementation((avatar) =>
+        Promise.resolve(avatar.filepath.replace('.jpg', '-new.jpg')),
+      );
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // All 25 should be processed
+      expect(refreshS3Url).toHaveBeenCalledTimes(25);
+    });
+
+    test('should skip agents without id or author', async () => {
+      mockCache.get.mockResolvedValue(false);
+
+      // Create agent without proper id field (edge case)
+      const agentWithoutId = await Agent.create({
+        id: `agent_${nanoid(12)}`,
+        name: 'Agent without ID field',
+        description: 'Testing',
+        provider: 'openai',
+        model: 'gpt-4',
+        author: userA,
+        avatar: {
+          source: FileSources.s3,
+          filepath: 'test-path.jpg',
+        },
+        versions: [
+          {
+            name: 'Agent without ID field',
+            description: 'Testing',
+            provider: 'openai',
+            model: 'gpt-4',
+            createdAt: new Date(),
+            updatedAt: new Date(),
+          },
+        ],
+      });
+
+      findAccessibleResources.mockResolvedValue([agentWithoutId._id, agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      refreshS3Url.mockResolvedValue('new-path.jpg');
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Should still complete without errors
+      expect(mockRes.json).toHaveBeenCalled();
+    });
+
+    test('should use MAX_AVATAR_REFRESH_AGENTS limit for full list query', async () => {
+      mockCache.get.mockResolvedValue(false);
+      findAccessibleResources.mockResolvedValue([]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Verify that the handler completed successfully
+      expect(mockRes.json).toHaveBeenCalled();
+    });
+
+    test('should treat legacy boolean cache entry as a miss and run refresh', async () => {
+      // Simulate a cache entry written by the pre-fix code
+      mockCache.get.mockResolvedValue(true);
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+      refreshS3Url.mockResolvedValue('new-s3-path.jpg');
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      // Boolean true fails the shape guard, so refresh must run
+      expect(refreshS3Url).toHaveBeenCalled();
+      // Cache is overwritten with the proper format
+      expect(mockCache.set).toHaveBeenCalledWith(
+        expect.any(String),
+        expect.objectContaining({ urlCache: expect.any(Object) }),
+        expect.any(Number),
+      );
+    });
+
+    test('should apply cached urlCache filepath to paginated response on cache hit', async () => {
+      const agentId = agentWithS3Avatar.id;
+      const cachedUrl = 'cached-presigned-url.jpg';
+
+      mockCache.get.mockResolvedValue({ urlCache: { [agentId]: cachedUrl } });
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      expect(refreshS3Url).not.toHaveBeenCalled();
+
+      const responseData = mockRes.json.mock.calls[0][0];
+      const agent = responseData.data.find((a) => a.id === agentId);
+      // Cached URL is served, not the stale DB value 'old-s3-path.jpg'
+      expect(agent.avatar.filepath).toBe(cachedUrl);
+    });
+
+    test('should preserve DB filepath for agents absent from urlCache on cache hit', async () => {
+      mockCache.get.mockResolvedValue({ urlCache: {} });
+      findAccessibleResources.mockResolvedValue([agentWithS3Avatar._id]);
+      findPubliclyAccessibleResources.mockResolvedValue([]);
+
+      const mockReq = {
+        user: { id: userA.toString(), role: 'USER' },
+        query: {},
+      };
+      const mockRes = {
+        status: jest.fn().mockReturnThis(),
+        json: jest.fn().mockReturnThis(),
+      };
+
+      await getListAgentsHandler(mockReq, mockRes);
+
+      expect(refreshS3Url).not.toHaveBeenCalled();
+
+      const responseData = mockRes.json.mock.calls[0][0];
+      const agent = responseData.data.find((a) => a.id === agentWithS3Avatar.id);
+      expect(agent.avatar.filepath).toBe('old-s3-path.jpg');
+    });
+  });
 });
--- a/api/server/controllers/assistants/v1.js
+++ b/api/server/controllers/assistants/v1.js
@ -31,7 +31,7 @@ const createAssistant = async (req, res) => {
    delete assistantData.conversation_starters;
    delete assistantData.append_current_datetime;

-    const toolDefinitions = await getCachedTools();
+    const toolDefinitions = (await getCachedTools()) ?? {};

    assistantData.tools = tools
      .map((tool) => {
@ -136,7 +136,7 @@ const patchAssistant = async (req, res) => {
      ...updateData
    } = req.body;

-    const toolDefinitions = await getCachedTools();
+    const toolDefinitions = (await getCachedTools()) ?? {};

    updateData.tools = (updateData.tools ?? [])
      .map((tool) => {
--- a/api/server/controllers/assistants/v2.js
+++ b/api/server/controllers/assistants/v2.js
@ -28,7 +28,7 @@ const createAssistant = async (req, res) => {
    delete assistantData.conversation_starters;
    delete assistantData.append_current_datetime;

-    const toolDefinitions = await getCachedTools();
+    const toolDefinitions = (await getCachedTools()) ?? {};

    assistantData.tools = tools
      .map((tool) => {
@ -125,7 +125,7 @@ const updateAssistant = async ({ req, openai, assistant_id, updateData }) => {

  let hasFileSearch = false;
  for (const tool of updateData.tools ?? []) {
-    const toolDefinitions = await getCachedTools();
+    const toolDefinitions = (await getCachedTools()) ?? {};
    let actualTool = typeof tool === 'string' ? toolDefinitions[tool] : tool;

    if (!actualTool && manifestToolMap[tool] && manifestToolMap[tool].toolkit === true) {
--- a/api/server/controllers/auth/LogoutController.js
+++ b/api/server/controllers/auth/LogoutController.js
@ -8,13 +8,16 @@ const logoutController = async (req, res) => {
  const parsedCookies = req.headers.cookie ? cookies.parse(req.headers.cookie) : {};
  const isOpenIdUser = req.user?.openidId != null && req.user?.provider === 'openid';

-  /** For OpenID users, read refresh token from session; for others, use cookie */
+  /** For OpenID users, read tokens from session (with cookie fallback) */
  let refreshToken;
+  let idToken;
  if (isOpenIdUser && req.session?.openidTokens) {
    refreshToken = req.session.openidTokens.refreshToken;
+    idToken = req.session.openidTokens.idToken;
    delete req.session.openidTokens;
  }
  refreshToken = refreshToken || parsedCookies.refreshToken;
+  idToken = idToken || parsedCookies.openid_id_token;

  try {
    const logout = await logoutUser(req, refreshToken);
@ -22,6 +25,7 @@ const logoutController = async (req, res) => {

    res.clearCookie('refreshToken');
    res.clearCookie('openid_access_token');
+    res.clearCookie('openid_id_token');
    res.clearCookie('openid_user_id');
    res.clearCookie('token_provider');
    const response = { message };
@ -30,21 +34,34 @@ const logoutController = async (req, res) => {
      isEnabled(process.env.OPENID_USE_END_SESSION_ENDPOINT) &&
      process.env.OPENID_ISSUER
    ) {
-      const openIdConfig = getOpenIdConfig();
-      if (!openIdConfig) {
-        logger.warn(
-          '[logoutController] OpenID config not found. Please verify that the open id configuration and initialization are correct.',
-        );
-      } else {
-        const endSessionEndpoint = openIdConfig
-          ? openIdConfig.serverMetadata().end_session_endpoint
-          : null;
+      let openIdConfig;
+      try {
+        openIdConfig = getOpenIdConfig();
+      } catch (err) {
+        logger.warn('[logoutController] OpenID config not available:', err.message);
+      }
+      if (openIdConfig) {
+        const endSessionEndpoint = openIdConfig.serverMetadata().end_session_endpoint;
        if (endSessionEndpoint) {
          const endSessionUrl = new URL(endSessionEndpoint);
          /** Redirect back to app's login page after IdP logout */
          const postLogoutRedirectUri =
            process.env.OPENID_POST_LOGOUT_REDIRECT_URI || `${process.env.DOMAIN_CLIENT}/login`;
          endSessionUrl.searchParams.set('post_logout_redirect_uri', postLogoutRedirectUri);
+
+          /** Add id_token_hint (preferred) or client_id for OIDC spec compliance */
+          if (idToken) {
+            endSessionUrl.searchParams.set('id_token_hint', idToken);
+          } else if (process.env.OPENID_CLIENT_ID) {
+            endSessionUrl.searchParams.set('client_id', process.env.OPENID_CLIENT_ID);
+          } else {
+            logger.warn(
+              '[logoutController] Neither id_token_hint nor OPENID_CLIENT_ID is available. ' +
+                'To enable id_token_hint, set OPENID_REUSE_TOKENS=true. ' +
+                'The OIDC end-session request may be rejected by the identity provider.',
+            );
+          }
+
          response.redirect = endSessionUrl.toString();
        } else {
          logger.warn(
--- a/api/server/controllers/auth/LogoutController.spec.js
+++ b/api/server/controllers/auth/LogoutController.spec.js
@ -0,0 +1,259 @@
+const cookies = require('cookie');
+
+const mockLogoutUser = jest.fn();
+const mockLogger = { warn: jest.fn(), error: jest.fn() };
+const mockIsEnabled = jest.fn();
+const mockGetOpenIdConfig = jest.fn();
+
+jest.mock('cookie');
+jest.mock('@librechat/api', () => ({ isEnabled: (...args) => mockIsEnabled(...args) }));
+jest.mock('@librechat/data-schemas', () => ({ logger: mockLogger }));
+jest.mock('~/server/services/AuthService', () => ({
+  logoutUser: (...args) => mockLogoutUser(...args),
+}));
+jest.mock('~/strategies', () => ({ getOpenIdConfig: () => mockGetOpenIdConfig() }));
+
+const { logoutController } = require('./LogoutController');
+
+function buildReq(overrides = {}) {
+  return {
+    user: { _id: 'user1', openidId: 'oid1', provider: 'openid' },
+    headers: { cookie: 'refreshToken=rt1' },
+    session: {
+      openidTokens: { refreshToken: 'srt', idToken: 'small-id-token' },
+      destroy: jest.fn(),
+    },
+    ...overrides,
+  };
+}
+
+function buildRes() {
+  const res = {
+    status: jest.fn().mockReturnThis(),
+    send: jest.fn().mockReturnThis(),
+    json: jest.fn().mockReturnThis(),
+    clearCookie: jest.fn(),
+  };
+  return res;
+}
+
+const ORIGINAL_ENV = process.env;
+
+beforeEach(() => {
+  jest.clearAllMocks();
+  process.env = {
+    ...ORIGINAL_ENV,
+    OPENID_USE_END_SESSION_ENDPOINT: 'true',
+    OPENID_ISSUER: 'https://idp.example.com',
+    OPENID_CLIENT_ID: 'my-client-id',
+    DOMAIN_CLIENT: 'https://app.example.com',
+  };
+  cookies.parse.mockReturnValue({ refreshToken: 'cookie-rt' });
+  mockLogoutUser.mockResolvedValue({ status: 200, message: 'Logout successful' });
+  mockIsEnabled.mockReturnValue(true);
+  mockGetOpenIdConfig.mockReturnValue({
+    serverMetadata: () => ({
+      end_session_endpoint: 'https://idp.example.com/logout',
+    }),
+  });
+});
+
+afterAll(() => {
+  process.env = ORIGINAL_ENV;
+});
+
+describe('LogoutController', () => {
+  describe('id_token_hint from session', () => {
+    it('sets id_token_hint when session has idToken', async () => {
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toContain('id_token_hint=small-id-token');
+      expect(body.redirect).not.toContain('client_id=');
+    });
+  });
+
+  describe('id_token_hint from cookie fallback', () => {
+    it('uses cookie id_token when session has no tokens', async () => {
+      cookies.parse.mockReturnValue({
+        refreshToken: 'cookie-rt',
+        openid_id_token: 'cookie-id-token',
+      });
+      const req = buildReq({ session: { destroy: jest.fn() } });
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toContain('id_token_hint=cookie-id-token');
+    });
+  });
+
+  describe('client_id fallback', () => {
+    it('falls back to client_id when no idToken is available', async () => {
+      cookies.parse.mockReturnValue({ refreshToken: 'cookie-rt' });
+      const req = buildReq({ session: { destroy: jest.fn() } });
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toContain('client_id=my-client-id');
+      expect(body.redirect).not.toContain('id_token_hint=');
+    });
+
+    it('does not produce client_id=undefined when OPENID_CLIENT_ID is unset', async () => {
+      delete process.env.OPENID_CLIENT_ID;
+      cookies.parse.mockReturnValue({ refreshToken: 'cookie-rt' });
+      const req = buildReq({ session: { destroy: jest.fn() } });
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).not.toContain('client_id=');
+      expect(body.redirect).not.toContain('undefined');
+      expect(mockLogger.warn).toHaveBeenCalledWith(
+        expect.stringContaining('Neither id_token_hint nor OPENID_CLIENT_ID'),
+      );
+    });
+  });
+
+  describe('OPENID_USE_END_SESSION_ENDPOINT disabled', () => {
+    it('does not include redirect when disabled', async () => {
+      mockIsEnabled.mockReturnValue(false);
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toBeUndefined();
+    });
+  });
+
+  describe('OPENID_ISSUER unset', () => {
+    it('does not include redirect when OPENID_ISSUER is missing', async () => {
+      delete process.env.OPENID_ISSUER;
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toBeUndefined();
+    });
+  });
+
+  describe('non-OpenID user', () => {
+    it('does not include redirect for non-OpenID users', async () => {
+      const req = buildReq({
+        user: { _id: 'user1', provider: 'local' },
+      });
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toBeUndefined();
+    });
+  });
+
+  describe('post_logout_redirect_uri', () => {
+    it('uses OPENID_POST_LOGOUT_REDIRECT_URI when set', async () => {
+      process.env.OPENID_POST_LOGOUT_REDIRECT_URI = 'https://custom.example.com/logged-out';
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      const url = new URL(body.redirect);
+      expect(url.searchParams.get('post_logout_redirect_uri')).toBe(
+        'https://custom.example.com/logged-out',
+      );
+    });
+
+    it('defaults to DOMAIN_CLIENT/login when OPENID_POST_LOGOUT_REDIRECT_URI is unset', async () => {
+      delete process.env.OPENID_POST_LOGOUT_REDIRECT_URI;
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      const url = new URL(body.redirect);
+      expect(url.searchParams.get('post_logout_redirect_uri')).toBe(
+        'https://app.example.com/login',
+      );
+    });
+  });
+
+  describe('OpenID config not available', () => {
+    it('warns and returns no redirect when getOpenIdConfig throws', async () => {
+      mockGetOpenIdConfig.mockImplementation(() => {
+        throw new Error('OpenID configuration has not been initialized');
+      });
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toBeUndefined();
+      expect(mockLogger.warn).toHaveBeenCalledWith(
+        expect.stringContaining('OpenID config not available'),
+        'OpenID configuration has not been initialized',
+      );
+    });
+  });
+
+  describe('end_session_endpoint not in metadata', () => {
+    it('warns and returns no redirect when end_session_endpoint is missing', async () => {
+      mockGetOpenIdConfig.mockReturnValue({
+        serverMetadata: () => ({}),
+      });
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      const body = res.send.mock.calls[0][0];
+      expect(body.redirect).toBeUndefined();
+      expect(mockLogger.warn).toHaveBeenCalledWith(
+        expect.stringContaining('end_session_endpoint not found'),
+      );
+    });
+  });
+
+  describe('error handling', () => {
+    it('returns 500 on logoutUser error', async () => {
+      mockLogoutUser.mockRejectedValue(new Error('session error'));
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      expect(res.status).toHaveBeenCalledWith(500);
+      expect(res.json).toHaveBeenCalledWith({ message: 'session error' });
+    });
+  });
+
+  describe('cookie clearing', () => {
+    it('clears all auth cookies on successful logout', async () => {
+      const req = buildReq();
+      const res = buildRes();
+
+      await logoutController(req, res);
+
+      expect(res.clearCookie).toHaveBeenCalledWith('refreshToken');
+      expect(res.clearCookie).toHaveBeenCalledWith('openid_access_token');
+      expect(res.clearCookie).toHaveBeenCalledWith('openid_id_token');
+      expect(res.clearCookie).toHaveBeenCalledWith('openid_user_id');
+      expect(res.clearCookie).toHaveBeenCalledWith('token_provider');
+    });
+  });
+});
--- a/api/server/controllers/auth/oauth.js
+++ b/api/server/controllers/auth/oauth.js
@ -0,0 +1,79 @@
+const { CacheKeys } = require('librechat-data-provider');
+const { logger, DEFAULT_SESSION_EXPIRY } = require('@librechat/data-schemas');
+const {
+  isEnabled,
+  getAdminPanelUrl,
+  isAdminPanelRedirect,
+  generateAdminExchangeCode,
+} = require('@librechat/api');
+const { syncUserEntraGroupMemberships } = require('~/server/services/PermissionService');
+const { setAuthTokens, setOpenIDAuthTokens } = require('~/server/services/AuthService');
+const getLogStores = require('~/cache/getLogStores');
+const { checkBan } = require('~/server/middleware');
+const { generateToken } = require('~/models');
+
+const domains = {
+  client: process.env.DOMAIN_CLIENT,
+  server: process.env.DOMAIN_SERVER,
+};
+
+function createOAuthHandler(redirectUri = domains.client) {
+  /**
+   * A handler to process OAuth authentication results.
+   * @type {Function}
+   * @param {ServerRequest} req - Express request object.
+   * @param {ServerResponse} res - Express response object.
+   * @param {NextFunction} next - Express next middleware function.
+   */
+  return async (req, res, next) => {
+    try {
+      if (res.headersSent) {
+        return;
+      }
+
+      await checkBan(req, res);
+      if (req.banned) {
+        return;
+      }
+
+      /** Check if this is an admin panel redirect (cross-origin) */
+      if (isAdminPanelRedirect(redirectUri, getAdminPanelUrl(), domains.client)) {
+        /** For admin panel, generate exchange code instead of setting cookies */
+        const cache = getLogStores(CacheKeys.ADMIN_OAUTH_EXCHANGE);
+        const sessionExpiry = Number(process.env.SESSION_EXPIRY) || DEFAULT_SESSION_EXPIRY;
+        const token = await generateToken(req.user, sessionExpiry);
+
+        /** Get refresh token from tokenset for OpenID users */
+        const refreshToken =
+          req.user.tokenset?.refresh_token || req.user.federatedTokens?.refresh_token;
+
+        const exchangeCode = await generateAdminExchangeCode(cache, req.user, token, refreshToken);
+
+        const callbackUrl = new URL(redirectUri);
+        callbackUrl.searchParams.set('code', exchangeCode);
+        logger.info(`[OAuth] Admin panel redirect with exchange code for user: ${req.user.email}`);
+        return res.redirect(callbackUrl.toString());
+      }
+
+      /** Standard OAuth flow - set cookies and redirect */
+      if (
+        req.user &&
+        req.user.provider == 'openid' &&
+        isEnabled(process.env.OPENID_REUSE_TOKENS) === true
+      ) {
+        await syncUserEntraGroupMemberships(req.user, req.user.tokenset.access_token);
+        setOpenIDAuthTokens(req.user.tokenset, req, res, req.user._id.toString());
+      } else {
+        await setAuthTokens(req.user._id, res);
+      }
+      res.redirect(redirectUri);
+    } catch (err) {
+      logger.error('Error in setting authentication tokens:', err);
+      next(err);
+    }
+  };
+}
+
+module.exports = {
+  createOAuthHandler,
+};
--- a/api/server/controllers/mcp.js
+++ b/api/server/controllers/mcp.js
@ -7,9 +7,11 @@
 */
 const { logger } = require('@librechat/data-schemas');
 const {
+  MCPErrorCodes,
+  redactServerSecrets,
+  redactAllServerSecrets,
  isMCPDomainNotAllowedError,
  isMCPInspectionFailedError,
-  MCPErrorCodes,
 } = require('@librechat/api');
 const { Constants, MCPServerUserInputSchema } = require('librechat-data-provider');
 const { cacheMCPServerTools, getMCPServerTools } = require('~/server/services/Config');
@ -181,10 +183,8 @@ const getMCPServersList = async (req, res) => {
      return res.status(401).json({ message: 'Unauthorized' });
    }

-    // 2. Get all server configs from registry (YAML + DB)
    const serverConfigs = await getMCPServersRegistry().getAllServerConfigs(userId);
-
-    return res.json(serverConfigs);
+    return res.json(redactAllServerSecrets(serverConfigs));
  } catch (error) {
    logger.error('[getMCPServersList]', error);
    res.status(500).json({ error: error.message });
@ -215,7 +215,7 @@ const createMCPServerController = async (req, res) => {
    );
    res.status(201).json({
      serverName: result.serverName,
-      ...result.config,
+      ...redactServerSecrets(result.config),
    });
  } catch (error) {
    logger.error('[createMCPServer]', error);
@ -243,7 +243,7 @@ const getMCPServerById = async (req, res) => {
      return res.status(404).json({ message: 'MCP server not found' });
    }

-    res.status(200).json(parsedConfig);
+    res.status(200).json(redactServerSecrets(parsedConfig));
  } catch (error) {
    logger.error('[getMCPServerById]', error);
    res.status(500).json({ message: error.message });
@ -274,7 +274,7 @@ const updateMCPServerController = async (req, res) => {
      userId,
    );

-    res.status(200).json(parsedConfig);
+    res.status(200).json(redactServerSecrets(parsedConfig));
  } catch (error) {
    logger.error('[updateMCPServer]', error);
    const mcpErrorResponse = handleMCPError(error, res);
--- a/api/server/experimental.js
+++ b/api/server/experimental.js
@ -14,6 +14,7 @@ const { logger } = require('@librechat/data-schemas');
 const mongoSanitize = require('express-mongo-sanitize');
 const {
  isEnabled,
+  apiNotFound,
  ErrorController,
  performStartupChecks,
  handleJsonParseError,
@ -297,8 +298,10 @@ if (cluster.isMaster) {
    /** Routes */
    app.use('/oauth', routes.oauth);
    app.use('/api/auth', routes.auth);
+    app.use('/api/admin', routes.adminAuth);
    app.use('/api/actions', routes.actions);
    app.use('/api/keys', routes.keys);
+    app.use('/api/api-keys', routes.apiKeys);
    app.use('/api/user', routes.user);
    app.use('/api/search', routes.search);
    app.use('/api/messages', routes.messages);
@ -309,7 +312,6 @@ if (cluster.isMaster) {
    app.use('/api/endpoints', routes.endpoints);
    app.use('/api/balance', routes.balance);
    app.use('/api/models', routes.models);
-    app.use('/api/plugins', routes.plugins);
    app.use('/api/config', routes.config);
    app.use('/api/assistants', routes.assistants);
    app.use('/api/files', await routes.files.initialize());
@ -323,8 +325,8 @@ if (cluster.isMaster) {
    app.use('/api/tags', routes.tags);
    app.use('/api/mcp', routes.mcp);

-    /** Error handler */
-    app.use(ErrorController);
+    /** 404 for unmatched API routes */
+    app.use('/api', apiNotFound);

    /** SPA fallback - serve index.html for all unmatched routes */
    app.use((req, res) => {
@ -342,6 +344,9 @@ if (cluster.isMaster) {
      res.send(updatedIndexHtml);
    });

+    /** Error handler (must be last - Express identifies error middleware by its 4-arg signature) */
+    app.use(ErrorController);
+
    /** Start listening on shared port (cluster will distribute connections) */
    app.listen(port, host, async (err) => {
      if (err) {
--- a/api/server/index.js
+++ b/api/server/index.js
@ -12,12 +12,14 @@ const { logger } = require('@librechat/data-schemas');
 const mongoSanitize = require('express-mongo-sanitize');
 const {
  isEnabled,
+  apiNotFound,
  ErrorController,
+  memoryDiagnostics,
  performStartupChecks,
  handleJsonParseError,
-  initializeFileStorage,
  GenerationJobManager,
  createStreamServices,
+  initializeFileStorage,
 } = require('@librechat/api');
 const { connectDb, indexSync } = require('~/db');
 const initializeOAuthReconnectManager = require('./services/initializeOAuthReconnectManager');
@ -134,8 +136,10 @@ const startServer = async () => {
  app.use('/oauth', routes.oauth);
  /* API Endpoints */
  app.use('/api/auth', routes.auth);
+  app.use('/api/admin', routes.adminAuth);
  app.use('/api/actions', routes.actions);
  app.use('/api/keys', routes.keys);
+  app.use('/api/api-keys', routes.apiKeys);
  app.use('/api/user', routes.user);
  app.use('/api/search', routes.search);
  app.use('/api/messages', routes.messages);
@ -160,8 +164,10 @@ const startServer = async () => {
  app.use('/api/tags', routes.tags);
  app.use('/api/mcp', routes.mcp);

-  app.use(ErrorController);
+  /** 404 for unmatched API routes */
+  app.use('/api', apiNotFound);

+  /** SPA fallback - serve index.html for all unmatched routes */
  app.use((req, res) => {
    res.set({
      'Cache-Control': process.env.INDEX_CACHE_CONTROL || 'no-cache, no-store, must-revalidate',
@ -177,6 +183,9 @@ const startServer = async () => {
    res.send(updatedIndexHtml);
  });

+  /** Error handler (must be last - Express identifies error middleware by its 4-arg signature) */
+  app.use(ErrorController);
+
  app.listen(port, host, async (err) => {
    if (err) {
      logger.error('Failed to start server:', err);
@ -199,6 +208,11 @@ const startServer = async () => {
    const streamServices = createStreamServices();
    GenerationJobManager.configure(streamServices);
    GenerationJobManager.initialize();
+
+    const inspectFlags = process.execArgv.some((arg) => arg.startsWith('--inspect'));
+    if (inspectFlags || isEnabled(process.env.MEM_DIAG)) {
+      memoryDiagnostics.start();
+    }
  });
 };

@ -249,6 +263,15 @@ process.on('uncaughtException', (err) => {
    return;
  }

+  if (isEnabled(process.env.CONTINUE_ON_UNCAUGHT_EXCEPTION)) {
+    logger.error('Unhandled error encountered. The app will continue running.', {
+      name: err?.name,
+      message: err?.message,
+      stack: err?.stack,
+    });
+    return;
+  }
+
  process.exit(1);
 });

--- a/api/server/index.spec.js
+++ b/api/server/index.spec.js
@ -100,6 +100,40 @@ describe('Server Configuration', () => {
    expect(response.headers['expires']).toBe('0');
  });

+  it('should return 404 JSON for undefined API routes', async () => {
+    const response = await request(app).get('/api/nonexistent');
+    expect(response.status).toBe(404);
+    expect(response.body).toEqual({ message: 'Endpoint not found' });
+  });
+
+  it('should return 404 JSON for nested undefined API routes', async () => {
+    const response = await request(app).get('/api/nonexistent/nested/path');
+    expect(response.status).toBe(404);
+    expect(response.body).toEqual({ message: 'Endpoint not found' });
+  });
+
+  it('should return 404 JSON for non-GET methods on undefined API routes', async () => {
+    const post = await request(app).post('/api/nonexistent');
+    expect(post.status).toBe(404);
+    expect(post.body).toEqual({ message: 'Endpoint not found' });
+
+    const del = await request(app).delete('/api/nonexistent');
+    expect(del.status).toBe(404);
+    expect(del.body).toEqual({ message: 'Endpoint not found' });
+  });
+
+  it('should return 404 JSON for the /api root path', async () => {
+    const response = await request(app).get('/api');
+    expect(response.status).toBe(404);
+    expect(response.body).toEqual({ message: 'Endpoint not found' });
+  });
+
+  it('should serve SPA HTML for non-API unmatched routes', async () => {
+    const response = await request(app).get('/this/does/not/exist');
+    expect(response.status).toBe(200);
+    expect(response.headers['content-type']).toMatch(/html/);
+  });
+
  it('should return 500 for unknown errors via ErrorController', async () => {
    // Testing the error handling here on top of unit tests to ensure the middleware is correctly integrated

--- a/api/server/middleware/abortMiddleware.js
+++ b/api/server/middleware/abortMiddleware.js
@ -1,19 +1,70 @@
 const { logger } = require('@librechat/data-schemas');
 const {
-  countTokens,
  isEnabled,
  sendEvent,
+  countTokens,
  GenerationJobManager,
+  recordCollectedUsage,
  sanitizeMessageForTransmit,
 } = require('@librechat/api');
 const { isAssistantsEndpoint, ErrorTypes } = require('librechat-data-provider');
+const { saveMessage, getConvo, updateBalance, bulkInsertTransactions } = require('~/models');
+const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const { truncateText, smartTruncateText } = require('~/app/clients/prompts');
+const { getMultiplier, getCacheMultiplier } = require('~/models/tx');
 const clearPendingReq = require('~/cache/clearPendingReq');
 const { sendError } = require('~/server/middleware/error');
-const { spendTokens } = require('~/models/spendTokens');
-const { saveMessage, getConvo } = require('~/models');
 const { abortRun } = require('./abortRun');

+/**
+ * Spend tokens for all models from collected usage.
+ * This handles both sequential and parallel agent execution.
+ *
+ * IMPORTANT: After spending, this function clears the collectedUsage array
+ * to prevent double-spending. The array is shared with AgentClient.collectedUsage,
+ * so clearing it here prevents the finally block from also spending tokens.
+ *
+ * @param {Object} params
+ * @param {string} params.userId - User ID
+ * @param {string} params.conversationId - Conversation ID
+ * @param {Array<Object>} params.collectedUsage - Usage metadata from all models
+ * @param {string} [params.fallbackModel] - Fallback model name if not in usage
+ * @param {string} [params.messageId] - The response message ID for transaction correlation
+ */
+async function spendCollectedUsage({
+  userId,
+  conversationId,
+  collectedUsage,
+  fallbackModel,
+  messageId,
+}) {
+  if (!collectedUsage || collectedUsage.length === 0) {
+    return;
+  }
+
+  await recordCollectedUsage(
+    {
+      spendTokens,
+      spendStructuredTokens,
+      pricing: { getMultiplier, getCacheMultiplier },
+      bulkWriteOps: { insertMany: bulkInsertTransactions, updateBalance },
+    },
+    {
+      user: userId,
+      conversationId,
+      collectedUsage,
+      context: 'abort',
+      messageId,
+      model: fallbackModel,
+    },
+  );
+
+  // Clear the array to prevent double-spending from the AgentClient finally block.
+  // The collectedUsage array is shared by reference with AgentClient.collectedUsage,
+  // so clearing it here ensures recordCollectedUsage() sees an empty array and returns early.
+  collectedUsage.length = 0;
+}
+
 /**
 * Abort an active message generation.
 * Uses GenerationJobManager for all agent requests.
@ -39,9 +90,8 @@ async function abortMessage(req, res) {
    return;
  }

-  const { jobData, content, text } = abortResult;
+  const { jobData, content, text, collectedUsage } = abortResult;

-  // Count tokens and spend them
  const completionTokens = await countTokens(text);
  const promptTokens = jobData?.promptTokens ?? 0;

@ -62,10 +112,22 @@ async function abortMessage(req, res) {
    tokenCount: completionTokens,
  };

-  await spendTokens(
-    { ...responseMessage, context: 'incomplete', user: userId },
-    { promptTokens, completionTokens },
-  );
+  // Spend tokens for ALL models from collectedUsage (handles parallel agents/addedConvo)
+  if (collectedUsage && collectedUsage.length > 0) {
+    await spendCollectedUsage({
+      userId,
+      conversationId: jobData?.conversationId,
+      collectedUsage,
+      fallbackModel: jobData?.model,
+      messageId: jobData?.responseMessageId,
+    });
+  } else {
+    // Fallback: no collected usage, use text-based token counting for primary model only
+    await spendTokens(
+      { ...responseMessage, context: 'incomplete', user: userId },
+      { promptTokens, completionTokens },
+    );
+  }

  await saveMessage(
    req,
@ -206,4 +268,5 @@ const handleAbortError = async (res, req, error, data) => {
 module.exports = {
  handleAbort,
  handleAbortError,
+  spendCollectedUsage,
 };
--- a/api/server/middleware/abortMiddleware.spec.js
+++ b/api/server/middleware/abortMiddleware.spec.js
@ -0,0 +1,245 @@
+/**
+ * Tests for abortMiddleware - spendCollectedUsage function
+ *
+ * This tests the token spending logic for abort scenarios,
+ * particularly for parallel agents (addedConvo) where multiple
+ * models need their tokens spent.
+ *
+ * spendCollectedUsage delegates to recordCollectedUsage from @librechat/api,
+ * passing pricing + bulkWriteOps deps, with context: 'abort'.
+ * After spending, it clears the collectedUsage array to prevent double-spending
+ * from the AgentClient finally block (which shares the same array reference).
+ */
+
+const mockSpendTokens = jest.fn().mockResolvedValue();
+const mockSpendStructuredTokens = jest.fn().mockResolvedValue();
+const mockRecordCollectedUsage = jest
+  .fn()
+  .mockResolvedValue({ input_tokens: 100, output_tokens: 50 });
+
+const mockGetMultiplier = jest.fn().mockReturnValue(1);
+const mockGetCacheMultiplier = jest.fn().mockReturnValue(null);
+
+jest.mock('~/models/spendTokens', () => ({
+  spendTokens: (...args) => mockSpendTokens(...args),
+  spendStructuredTokens: (...args) => mockSpendStructuredTokens(...args),
+}));
+
+jest.mock('~/models/tx', () => ({
+  getMultiplier: mockGetMultiplier,
+  getCacheMultiplier: mockGetCacheMultiplier,
+}));
+
+jest.mock('@librechat/data-schemas', () => ({
+  logger: {
+    debug: jest.fn(),
+    error: jest.fn(),
+    warn: jest.fn(),
+    info: jest.fn(),
+  },
+}));
+
+jest.mock('@librechat/api', () => ({
+  countTokens: jest.fn().mockResolvedValue(100),
+  isEnabled: jest.fn().mockReturnValue(false),
+  sendEvent: jest.fn(),
+  GenerationJobManager: {
+    abortJob: jest.fn(),
+  },
+  recordCollectedUsage: mockRecordCollectedUsage,
+  sanitizeMessageForTransmit: jest.fn((msg) => msg),
+}));
+
+jest.mock('librechat-data-provider', () => ({
+  isAssistantsEndpoint: jest.fn().mockReturnValue(false),
+  ErrorTypes: { INVALID_REQUEST: 'INVALID_REQUEST', NO_SYSTEM_MESSAGES: 'NO_SYSTEM_MESSAGES' },
+}));
+
+jest.mock('~/app/clients/prompts', () => ({
+  truncateText: jest.fn((text) => text),
+  smartTruncateText: jest.fn((text) => text),
+}));
+
+jest.mock('~/cache/clearPendingReq', () => jest.fn().mockResolvedValue());
+
+jest.mock('~/server/middleware/error', () => ({
+  sendError: jest.fn(),
+}));
+
+const mockUpdateBalance = jest.fn().mockResolvedValue({});
+const mockBulkInsertTransactions = jest.fn().mockResolvedValue(undefined);
+jest.mock('~/models', () => ({
+  saveMessage: jest.fn().mockResolvedValue(),
+  getConvo: jest.fn().mockResolvedValue({ title: 'Test Chat' }),
+  updateBalance: mockUpdateBalance,
+  bulkInsertTransactions: mockBulkInsertTransactions,
+}));
+
+jest.mock('./abortRun', () => ({
+  abortRun: jest.fn(),
+}));
+
+const { spendCollectedUsage } = require('./abortMiddleware');
+
+describe('abortMiddleware - spendCollectedUsage', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  describe('spendCollectedUsage delegation', () => {
+    it('should return early if collectedUsage is empty', async () => {
+      await spendCollectedUsage({
+        userId: 'user-123',
+        conversationId: 'convo-123',
+        collectedUsage: [],
+        fallbackModel: 'gpt-4',
+      });
+
+      expect(mockRecordCollectedUsage).not.toHaveBeenCalled();
+    });
+
+    it('should return early if collectedUsage is null', async () => {
+      await spendCollectedUsage({
+        userId: 'user-123',
+        conversationId: 'convo-123',
+        collectedUsage: null,
+        fallbackModel: 'gpt-4',
+      });
+
+      expect(mockRecordCollectedUsage).not.toHaveBeenCalled();
+    });
+
+    it('should call recordCollectedUsage with abort context and full deps', async () => {
+      const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }];
+
+      await spendCollectedUsage({
+        userId: 'user-123',
+        conversationId: 'convo-123',
+        collectedUsage,
+        fallbackModel: 'gpt-4',
+        messageId: 'msg-123',
+      });
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        {
+          spendTokens: expect.any(Function),
+          spendStructuredTokens: expect.any(Function),
+          pricing: {
+            getMultiplier: mockGetMultiplier,
+            getCacheMultiplier: mockGetCacheMultiplier,
+          },
+          bulkWriteOps: {
+            insertMany: mockBulkInsertTransactions,
+            updateBalance: mockUpdateBalance,
+          },
+        },
+        {
+          user: 'user-123',
+          conversationId: 'convo-123',
+          collectedUsage,
+          context: 'abort',
+          messageId: 'msg-123',
+          model: 'gpt-4',
+        },
+      );
+    });
+
+    it('should pass context abort for multiple models (parallel agents)', async () => {
+      const collectedUsage = [
+        { input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
+        { input_tokens: 80, output_tokens: 40, model: 'claude-3' },
+        { input_tokens: 120, output_tokens: 60, model: 'gemini-pro' },
+      ];
+
+      await spendCollectedUsage({
+        userId: 'user-123',
+        conversationId: 'convo-123',
+        collectedUsage,
+        fallbackModel: 'gpt-4',
+      });
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          context: 'abort',
+          collectedUsage,
+        }),
+      );
+    });
+
+    it('should handle real-world parallel agent abort scenario', async () => {
+      const collectedUsage = [
+        { input_tokens: 31596, output_tokens: 151, model: 'gemini-3-flash-preview' },
+        { input_tokens: 28000, output_tokens: 120, model: 'gpt-5.2' },
+      ];
+
+      await spendCollectedUsage({
+        userId: 'user-123',
+        conversationId: 'convo-123',
+        collectedUsage,
+        fallbackModel: 'gemini-3-flash-preview',
+      });
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(mockRecordCollectedUsage).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({
+          user: 'user-123',
+          conversationId: 'convo-123',
+          context: 'abort',
+          model: 'gemini-3-flash-preview',
+        }),
+      );
+    });
+
+    /**
+     * Race condition prevention: after abort middleware spends tokens,
+     * the collectedUsage array is cleared so AgentClient.recordCollectedUsage()
+     * (which shares the same array reference) sees an empty array and returns early.
+     */
+    it('should clear collectedUsage array after spending to prevent double-spending', async () => {
+      const collectedUsage = [
+        { input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
+        { input_tokens: 80, output_tokens: 40, model: 'claude-3' },
+      ];
+
+      expect(collectedUsage.length).toBe(2);
+
+      await spendCollectedUsage({
+        userId: 'user-123',
+        conversationId: 'convo-123',
+        collectedUsage,
+        fallbackModel: 'gpt-4',
+      });
+
+      expect(mockRecordCollectedUsage).toHaveBeenCalledTimes(1);
+      expect(collectedUsage.length).toBe(0);
+    });
+
+    it('should await recordCollectedUsage before clearing array', async () => {
+      let resolved = false;
+      mockRecordCollectedUsage.mockImplementation(async () => {
+        await new Promise((resolve) => setTimeout(resolve, 10));
+        resolved = true;
+        return { input_tokens: 100, output_tokens: 50 };
+      });
+
+      const collectedUsage = [
+        { input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
+        { input_tokens: 80, output_tokens: 40, model: 'claude-3' },
+      ];
+
+      await spendCollectedUsage({
+        userId: 'user-123',
+        conversationId: 'convo-123',
+        collectedUsage,
+        fallbackModel: 'gpt-4',
+      });
+
+      expect(resolved).toBe(true);
+      expect(collectedUsage.length).toBe(0);
+    });
+  });
+});
--- a/api/server/middleware/accessResources/canAccessAgentResource.spec.js
+++ b/api/server/middleware/accessResources/canAccessAgentResource.spec.js
@ -29,7 +29,7 @@ describe('canAccessAgentResource middleware', () => {
        AGENTS: {
          USE: true,
          CREATE: true,
-          SHARED_GLOBAL: false,
+          SHARE: true,
        },
      },
    });
--- a/api/server/middleware/accessResources/canAccessMCPServerResource.js
+++ b/api/server/middleware/accessResources/canAccessMCPServerResource.js
@ -1,16 +1,16 @@
 const { ResourceType } = require('librechat-data-provider');
 const { canAccessResource } = require('./canAccessResource');
-const { findMCPServerById } = require('~/models');
+const { findMCPServerByServerName } = require('~/models');

 /**
- * MCP Server ID resolver function
- * Resolves custom MCP server ID (e.g., "mcp_abc123") to MongoDB ObjectId
+ * MCP Server name resolver function
+ * Resolves MCP server name (e.g., "my-mcp-server") to MongoDB ObjectId
 *
- * @param {string} mcpServerCustomId - Custom MCP server ID from route parameter
+ * @param {string} serverName - Server name from route parameter
 * @returns {Promise<Object|null>} MCP server document with _id field, or null if not found
 */
-const resolveMCPServerId = async (mcpServerCustomId) => {
-  return await findMCPServerById(mcpServerCustomId);
+const resolveMCPServerName = async (serverName) => {
+  return await findMCPServerByServerName(serverName);
 };

 /**
@ -52,7 +52,7 @@ const canAccessMCPServerResource = (options) => {
    resourceType: ResourceType.MCPSERVER,
    requiredPermission,
    resourceIdParam,
-    idResolver: resolveMCPServerId,
+    idResolver: resolveMCPServerName,
  });
 };

--- a/api/server/middleware/accessResources/canAccessMCPServerResource.spec.js
+++ b/api/server/middleware/accessResources/canAccessMCPServerResource.spec.js
@ -26,10 +26,10 @@ describe('canAccessMCPServerResource middleware', () => {
    await Role.create({
      name: 'test-role',
      permissions: {
-        MCPSERVERS: {
+        MCP_SERVERS: {
          USE: true,
          CREATE: true,
-          SHARED_GLOBAL: false,
+          SHARE: true,
        },
      },
    });
@ -545,7 +545,7 @@ describe('canAccessMCPServerResource middleware', () => {

  describe('error handling', () => {
    test('should handle server returning null gracefully (treated as not found)', async () => {
-      // When an MCP server is not found, findMCPServerById returns null
+      // When an MCP server is not found, findMCPServerByServerName returns null
      // which the middleware correctly handles as a 404
      req.params.serverName = 'definitely-non-existent-server';

--- a/api/server/middleware/accessResources/fileAccess.spec.js
+++ b/api/server/middleware/accessResources/fileAccess.spec.js
@ -32,7 +32,7 @@ describe('fileAccess middleware', () => {
        AGENTS: {
          USE: true,
          CREATE: true,
-          SHARED_GLOBAL: false,
+          SHARE: true,
        },
      },
    });
--- a/api/server/middleware/buildEndpointOption.js
+++ b/api/server/middleware/buildEndpointOption.js
@ -5,9 +5,11 @@ const {
  EModelEndpoint,
  isAgentsEndpoint,
  parseCompactConvo,
+  getDefaultParamsEndpoint,
 } = require('librechat-data-provider');
 const azureAssistants = require('~/server/services/Endpoints/azureAssistants');
 const assistants = require('~/server/services/Endpoints/assistants');
+const { getEndpointsConfig } = require('~/server/services/Config');
 const agents = require('~/server/services/Endpoints/agents');
 const { updateFilesUsage } = require('~/models');

@ -19,9 +21,24 @@ const buildFunction = {

 async function buildEndpointOption(req, res, next) {
  const { endpoint, endpointType } = req.body;
+
+  let endpointsConfig;
+  try {
+    endpointsConfig = await getEndpointsConfig(req);
+  } catch (error) {
+    logger.error('Error fetching endpoints config in buildEndpointOption', error);
+  }
+
+  const defaultParamsEndpoint = getDefaultParamsEndpoint(endpointsConfig, endpoint);
+
  let parsedBody;
  try {
-    parsedBody = parseCompactConvo({ endpoint, endpointType, conversation: req.body });
+    parsedBody = parseCompactConvo({
+      endpoint,
+      endpointType,
+      conversation: req.body,
+      defaultParamsEndpoint,
+    });
  } catch (error) {
    logger.error(`Error parsing compact conversation for endpoint ${endpoint}`, error);
    logger.debug({
@ -55,6 +72,7 @@ async function buildEndpointOption(req, res, next) {
        endpoint,
        endpointType,
        conversation: currentModelSpec.preset,
+        defaultParamsEndpoint,
      });
      if (currentModelSpec.iconURL != null && currentModelSpec.iconURL !== '') {
        parsedBody.iconURL = currentModelSpec.iconURL;
--- a/api/server/middleware/buildEndpointOption.spec.js
+++ b/api/server/middleware/buildEndpointOption.spec.js
@ -0,0 +1,237 @@
+/**
+ * Wrap parseCompactConvo: the REAL function runs, but jest can observe
+ * calls and return values. Must be declared before require('./buildEndpointOption')
+ * so the destructured reference in the middleware captures the wrapper.
+ */
+jest.mock('librechat-data-provider', () => {
+  const actual = jest.requireActual('librechat-data-provider');
+  return {
+    ...actual,
+    parseCompactConvo: jest.fn((...args) => actual.parseCompactConvo(...args)),
+  };
+});
+
+const { EModelEndpoint, parseCompactConvo } = require('librechat-data-provider');
+
+const mockBuildOptions = jest.fn((_endpoint, parsedBody) => ({
+  ...parsedBody,
+  endpoint: _endpoint,
+}));
+
+jest.mock('~/server/services/Endpoints/azureAssistants', () => ({
+  buildOptions: mockBuildOptions,
+}));
+jest.mock('~/server/services/Endpoints/assistants', () => ({
+  buildOptions: mockBuildOptions,
+}));
+jest.mock('~/server/services/Endpoints/agents', () => ({
+  buildOptions: mockBuildOptions,
+}));
+
+jest.mock('~/models', () => ({
+  updateFilesUsage: jest.fn(),
+}));
+
+const mockGetEndpointsConfig = jest.fn();
+jest.mock('~/server/services/Config', () => ({
+  getEndpointsConfig: (...args) => mockGetEndpointsConfig(...args),
+}));
+
+jest.mock('@librechat/api', () => ({
+  handleError: jest.fn(),
+}));
+
+const buildEndpointOption = require('./buildEndpointOption');
+
+const createReq = (body, config = {}) => ({
+  body,
+  config,
+  baseUrl: '/api/chat',
+});
+
+const createRes = () => ({
+  status: jest.fn().mockReturnThis(),
+  json: jest.fn().mockReturnThis(),
+});
+
+describe('buildEndpointOption - defaultParamsEndpoint parsing', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  it('should pass defaultParamsEndpoint to parseCompactConvo and preserve maxOutputTokens', async () => {
+    mockGetEndpointsConfig.mockResolvedValue({
+      AnthropicClaude: {
+        type: EModelEndpoint.custom,
+        customParams: {
+          defaultParamsEndpoint: EModelEndpoint.anthropic,
+        },
+      },
+    });
+
+    const req = createReq(
+      {
+        endpoint: 'AnthropicClaude',
+        endpointType: EModelEndpoint.custom,
+        model: 'anthropic/claude-opus-4.5',
+        temperature: 0.7,
+        maxOutputTokens: 8192,
+        topP: 0.9,
+        maxContextTokens: 50000,
+      },
+      { modelSpecs: null },
+    );
+
+    await buildEndpointOption(req, createRes(), jest.fn());
+
+    expect(parseCompactConvo).toHaveBeenCalledWith(
+      expect.objectContaining({
+        defaultParamsEndpoint: EModelEndpoint.anthropic,
+      }),
+    );
+
+    const parsedResult = parseCompactConvo.mock.results[0].value;
+    expect(parsedResult.maxOutputTokens).toBe(8192);
+    expect(parsedResult.topP).toBe(0.9);
+    expect(parsedResult.temperature).toBe(0.7);
+    expect(parsedResult.maxContextTokens).toBe(50000);
+  });
+
+  it('should strip maxOutputTokens when no defaultParamsEndpoint is configured', async () => {
+    mockGetEndpointsConfig.mockResolvedValue({
+      MyOpenRouter: {
+        type: EModelEndpoint.custom,
+      },
+    });
+
+    const req = createReq(
+      {
+        endpoint: 'MyOpenRouter',
+        endpointType: EModelEndpoint.custom,
+        model: 'gpt-4o',
+        temperature: 0.7,
+        maxOutputTokens: 8192,
+        max_tokens: 4096,
+      },
+      { modelSpecs: null },
+    );
+
+    await buildEndpointOption(req, createRes(), jest.fn());
+
+    expect(parseCompactConvo).toHaveBeenCalledWith(
+      expect.objectContaining({
+        defaultParamsEndpoint: undefined,
+      }),
+    );
+
+    const parsedResult = parseCompactConvo.mock.results[0].value;
+    expect(parsedResult.maxOutputTokens).toBeUndefined();
+    expect(parsedResult.max_tokens).toBe(4096);
+    expect(parsedResult.temperature).toBe(0.7);
+  });
+
+  it('should strip bedrock region from custom endpoint without defaultParamsEndpoint', async () => {
+    mockGetEndpointsConfig.mockResolvedValue({
+      MyEndpoint: {
+        type: EModelEndpoint.custom,
+      },
+    });
+
+    const req = createReq(
+      {
+        endpoint: 'MyEndpoint',
+        endpointType: EModelEndpoint.custom,
+        model: 'gpt-4o',
+        temperature: 0.7,
+        region: 'us-east-1',
+      },
+      { modelSpecs: null },
+    );
+
+    await buildEndpointOption(req, createRes(), jest.fn());
+
+    const parsedResult = parseCompactConvo.mock.results[0].value;
+    expect(parsedResult.region).toBeUndefined();
+    expect(parsedResult.temperature).toBe(0.7);
+  });
+
+  it('should pass defaultParamsEndpoint when re-parsing enforced model spec', async () => {
+    mockGetEndpointsConfig.mockResolvedValue({
+      AnthropicClaude: {
+        type: EModelEndpoint.custom,
+        customParams: {
+          defaultParamsEndpoint: EModelEndpoint.anthropic,
+        },
+      },
+    });
+
+    const modelSpec = {
+      name: 'claude-opus-4.5',
+      preset: {
+        endpoint: 'AnthropicClaude',
+        endpointType: EModelEndpoint.custom,
+        model: 'anthropic/claude-opus-4.5',
+        temperature: 0.7,
+        maxOutputTokens: 8192,
+        maxContextTokens: 50000,
+      },
+    };
+
+    const req = createReq(
+      {
+        endpoint: 'AnthropicClaude',
+        endpointType: EModelEndpoint.custom,
+        spec: 'claude-opus-4.5',
+        model: 'anthropic/claude-opus-4.5',
+      },
+      {
+        modelSpecs: {
+          enforce: true,
+          list: [modelSpec],
+        },
+      },
+    );
+
+    await buildEndpointOption(req, createRes(), jest.fn());
+
+    const enforcedCall = parseCompactConvo.mock.calls[1];
+    expect(enforcedCall[0]).toEqual(
+      expect.objectContaining({
+        defaultParamsEndpoint: EModelEndpoint.anthropic,
+      }),
+    );
+
+    const enforcedResult = parseCompactConvo.mock.results[1].value;
+    expect(enforcedResult.maxOutputTokens).toBe(8192);
+    expect(enforcedResult.temperature).toBe(0.7);
+    expect(enforcedResult.maxContextTokens).toBe(50000);
+  });
+
+  it('should fall back to OpenAI schema when getEndpointsConfig fails', async () => {
+    mockGetEndpointsConfig.mockRejectedValue(new Error('Config unavailable'));
+
+    const req = createReq(
+      {
+        endpoint: 'AnthropicClaude',
+        endpointType: EModelEndpoint.custom,
+        model: 'anthropic/claude-opus-4.5',
+        temperature: 0.7,
+        maxOutputTokens: 8192,
+        max_tokens: 4096,
+      },
+      { modelSpecs: null },
+    );
+
+    await buildEndpointOption(req, createRes(), jest.fn());
+
+    expect(parseCompactConvo).toHaveBeenCalledWith(
+      expect.objectContaining({
+        defaultParamsEndpoint: undefined,
+      }),
+    );
+
+    const parsedResult = parseCompactConvo.mock.results[0].value;
+    expect(parsedResult.maxOutputTokens).toBeUndefined();
+    expect(parsedResult.max_tokens).toBe(4096);
+  });
+});
--- a/api/server/middleware/checkSharePublicAccess.js
+++ b/api/server/middleware/checkSharePublicAccess.js
@ -0,0 +1,85 @@
+const { logger } = require('@librechat/data-schemas');
+const { ResourceType, PermissionTypes, Permissions } = require('librechat-data-provider');
+const { getRoleByName } = require('~/models/Role');
+
+/**
+ * Maps resource types to their corresponding permission types
+ */
+const resourceToPermissionType = {
+  [ResourceType.AGENT]: PermissionTypes.AGENTS,
+  [ResourceType.PROMPTGROUP]: PermissionTypes.PROMPTS,
+  [ResourceType.MCPSERVER]: PermissionTypes.MCP_SERVERS,
+  [ResourceType.REMOTE_AGENT]: PermissionTypes.REMOTE_AGENTS,
+};
+
+/**
+ * Middleware to check if user has SHARE_PUBLIC permission for a resource type
+ * Only enforced when request body contains `public: true`
+ * @param {import('express').Request} req - Express request
+ * @param {import('express').Response} res - Express response
+ * @param {import('express').NextFunction} next - Express next function
+ */
+const checkSharePublicAccess = async (req, res, next) => {
+  try {
+    const { public: isPublic } = req.body;
+
+    // Only check if trying to enable public sharing
+    if (!isPublic) {
+      return next();
+    }
+
+    const user = req.user;
+    if (!user || !user.role) {
+      return res.status(401).json({
+        error: 'Unauthorized',
+        message: 'Authentication required',
+      });
+    }
+
+    const { resourceType } = req.params;
+    const permissionType = resourceToPermissionType[resourceType];
+
+    if (!permissionType) {
+      return res.status(400).json({
+        error: 'Bad Request',
+        message: `Unsupported resource type for public sharing: ${resourceType}`,
+      });
+    }
+
+    const role = await getRoleByName(user.role);
+    if (!role || !role.permissions) {
+      return res.status(403).json({
+        error: 'Forbidden',
+        message: 'No permissions configured for user role',
+      });
+    }
+
+    const resourcePerms = role.permissions[permissionType] || {};
+    const canSharePublic = resourcePerms[Permissions.SHARE_PUBLIC] === true;
+
+    if (!canSharePublic) {
+      logger.warn(
+        `[checkSharePublicAccess][${user.id}] User denied SHARE_PUBLIC for ${resourceType}`,
+      );
+      return res.status(403).json({
+        error: 'Forbidden',
+        message: `You do not have permission to share ${resourceType} resources publicly`,
+      });
+    }
+
+    next();
+  } catch (error) {
+    logger.error(
+      `[checkSharePublicAccess][${req.user?.id}] Error checking SHARE_PUBLIC permission`,
+      error,
+    );
+    return res.status(500).json({
+      error: 'Internal Server Error',
+      message: 'Failed to check public sharing permissions',
+    });
+  }
+};
+
+module.exports = {
+  checkSharePublicAccess,
+};
--- a/api/server/middleware/checkSharePublicAccess.spec.js
+++ b/api/server/middleware/checkSharePublicAccess.spec.js
@ -0,0 +1,164 @@
+const { ResourceType, PermissionTypes, Permissions } = require('librechat-data-provider');
+const { checkSharePublicAccess } = require('./checkSharePublicAccess');
+const { getRoleByName } = require('~/models/Role');
+
+jest.mock('~/models/Role');
+
+describe('checkSharePublicAccess middleware', () => {
+  let mockReq;
+  let mockRes;
+  let mockNext;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    mockReq = {
+      user: { id: 'user123', role: 'USER' },
+      params: { resourceType: ResourceType.AGENT },
+      body: {},
+    };
+    mockRes = {
+      status: jest.fn().mockReturnThis(),
+      json: jest.fn(),
+    };
+    mockNext = jest.fn();
+  });
+
+  it('should call next() when public is not true', async () => {
+    mockReq.body = { public: false };
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockNext).toHaveBeenCalled();
+    expect(mockRes.status).not.toHaveBeenCalled();
+  });
+
+  it('should call next() when public is undefined', async () => {
+    mockReq.body = { updated: [] };
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockNext).toHaveBeenCalled();
+    expect(mockRes.status).not.toHaveBeenCalled();
+  });
+
+  it('should return 401 when user is not authenticated', async () => {
+    mockReq.body = { public: true };
+    mockReq.user = null;
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockRes.status).toHaveBeenCalledWith(401);
+    expect(mockRes.json).toHaveBeenCalledWith({
+      error: 'Unauthorized',
+      message: 'Authentication required',
+    });
+    expect(mockNext).not.toHaveBeenCalled();
+  });
+
+  it('should return 403 when user role has no SHARE_PUBLIC permission for agents', async () => {
+    mockReq.body = { public: true };
+    mockReq.params = { resourceType: ResourceType.AGENT };
+    getRoleByName.mockResolvedValue({
+      permissions: {
+        [PermissionTypes.AGENTS]: {
+          [Permissions.SHARE]: true,
+          [Permissions.SHARE_PUBLIC]: false,
+        },
+      },
+    });
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockRes.status).toHaveBeenCalledWith(403);
+    expect(mockRes.json).toHaveBeenCalledWith({
+      error: 'Forbidden',
+      message: `You do not have permission to share ${ResourceType.AGENT} resources publicly`,
+    });
+    expect(mockNext).not.toHaveBeenCalled();
+  });
+
+  it('should call next() when user has SHARE_PUBLIC permission for agents', async () => {
+    mockReq.body = { public: true };
+    mockReq.params = { resourceType: ResourceType.AGENT };
+    getRoleByName.mockResolvedValue({
+      permissions: {
+        [PermissionTypes.AGENTS]: {
+          [Permissions.SHARE]: true,
+          [Permissions.SHARE_PUBLIC]: true,
+        },
+      },
+    });
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockNext).toHaveBeenCalled();
+    expect(mockRes.status).not.toHaveBeenCalled();
+  });
+
+  it('should check prompts permission for promptgroup resource type', async () => {
+    mockReq.body = { public: true };
+    mockReq.params = { resourceType: ResourceType.PROMPTGROUP };
+    getRoleByName.mockResolvedValue({
+      permissions: {
+        [PermissionTypes.PROMPTS]: {
+          [Permissions.SHARE_PUBLIC]: true,
+        },
+      },
+    });
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockNext).toHaveBeenCalled();
+  });
+
+  it('should check mcp_servers permission for mcpserver resource type', async () => {
+    mockReq.body = { public: true };
+    mockReq.params = { resourceType: ResourceType.MCPSERVER };
+    getRoleByName.mockResolvedValue({
+      permissions: {
+        [PermissionTypes.MCP_SERVERS]: {
+          [Permissions.SHARE_PUBLIC]: true,
+        },
+      },
+    });
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockNext).toHaveBeenCalled();
+  });
+
+  it('should return 400 for unsupported resource type', async () => {
+    mockReq.body = { public: true };
+    mockReq.params = { resourceType: 'unsupported' };
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockRes.status).toHaveBeenCalledWith(400);
+    expect(mockRes.json).toHaveBeenCalledWith({
+      error: 'Bad Request',
+      message: 'Unsupported resource type for public sharing: unsupported',
+    });
+  });
+
+  it('should return 403 when role has no permissions object', async () => {
+    mockReq.body = { public: true };
+    getRoleByName.mockResolvedValue({ permissions: null });
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockRes.status).toHaveBeenCalledWith(403);
+  });
+
+  it('should return 500 on error', async () => {
+    mockReq.body = { public: true };
+    getRoleByName.mockRejectedValue(new Error('Database error'));
+
+    await checkSharePublicAccess(mockReq, mockRes, mockNext);
+
+    expect(mockRes.status).toHaveBeenCalledWith(500);
+    expect(mockRes.json).toHaveBeenCalledWith({
+      error: 'Internal Server Error',
+      message: 'Failed to check public sharing permissions',
+    });
+  });
+});
--- a/api/server/middleware/limiters/forkLimiters.js
+++ b/api/server/middleware/limiters/forkLimiters.js
@ -48,7 +48,7 @@ const createForkHandler = (ip = true) => {
    };

    await logViolation(req, res, type, errorMessage, forkViolationScore);
-    res.status(429).json({ message: 'Too many conversation fork requests. Try again later' });
+    res.status(429).json({ message: 'Too many requests. Try again later' });
  };
 };

--- a/api/server/middleware/requireJwtAuth.js
+++ b/api/server/middleware/requireJwtAuth.js
@ -7,16 +7,13 @@ const { isEnabled } = require('@librechat/api');
 * Switches between JWT and OpenID authentication based on cookies and environment settings
 */
 const requireJwtAuth = (req, res, next) => {
-  // Check if token provider is specified in cookies
  const cookieHeader = req.headers.cookie;
  const tokenProvider = cookieHeader ? cookies.parse(cookieHeader).token_provider : null;

-  // Use OpenID authentication if token provider is OpenID and OPENID_REUSE_TOKENS is enabled
  if (tokenProvider === 'openid' && isEnabled(process.env.OPENID_REUSE_TOKENS)) {
    return passport.authenticate('openidJwt', { session: false })(req, res, next);
  }

-  // Default to standard JWT authentication
  return passport.authenticate('jwt', { session: false })(req, res, next);
 };

--- a/api/server/middleware/roles/access.spec.js
+++ b/api/server/middleware/roles/access.spec.js
@ -51,9 +51,9 @@ describe('Access Middleware', () => {
      permissions: {
        [PermissionTypes.BOOKMARKS]: { [Permissions.USE]: true },
        [PermissionTypes.PROMPTS]: {
-          [Permissions.SHARED_GLOBAL]: false,
          [Permissions.USE]: true,
          [Permissions.CREATE]: true,
+          [Permissions.SHARE]: true,
        },
        [PermissionTypes.MEMORIES]: {
          [Permissions.USE]: true,
@ -65,7 +65,7 @@ describe('Access Middleware', () => {
        [PermissionTypes.AGENTS]: {
          [Permissions.USE]: true,
          [Permissions.CREATE]: false,
-          [Permissions.SHARED_GLOBAL]: false,
+          [Permissions.SHARE]: false,
        },
        [PermissionTypes.MULTI_CONVO]: { [Permissions.USE]: true },
        [PermissionTypes.TEMPORARY_CHAT]: { [Permissions.USE]: true },
@ -79,9 +79,9 @@ describe('Access Middleware', () => {
      permissions: {
        [PermissionTypes.BOOKMARKS]: { [Permissions.USE]: true },
        [PermissionTypes.PROMPTS]: {
-          [Permissions.SHARED_GLOBAL]: true,
          [Permissions.USE]: true,
          [Permissions.CREATE]: true,
+          [Permissions.SHARE]: true,
        },
        [PermissionTypes.MEMORIES]: {
          [Permissions.USE]: true,
@ -93,7 +93,7 @@ describe('Access Middleware', () => {
        [PermissionTypes.AGENTS]: {
          [Permissions.USE]: true,
          [Permissions.CREATE]: true,
-          [Permissions.SHARED_GLOBAL]: true,
+          [Permissions.SHARE]: true,
        },
        [PermissionTypes.MULTI_CONVO]: { [Permissions.USE]: true },
        [PermissionTypes.TEMPORARY_CHAT]: { [Permissions.USE]: true },
@ -110,7 +110,7 @@ describe('Access Middleware', () => {
        [PermissionTypes.AGENTS]: {
          [Permissions.USE]: false,
          [Permissions.CREATE]: false,
-          [Permissions.SHARED_GLOBAL]: false,
+          [Permissions.SHARE]: false,
        },
        // Has permissions for other types
        [PermissionTypes.PROMPTS]: {
@ -241,7 +241,7 @@ describe('Access Middleware', () => {
        req: {},
        user: { id: 'admin123', role: 'admin' },
        permissionType: PermissionTypes.AGENTS,
-        permissions: [Permissions.SHARED_GLOBAL],
+        permissions: [Permissions.SHARE],
        getRoleByName,
      });
      expect(shareResult).toBe(true);
@ -318,7 +318,7 @@ describe('Access Middleware', () => {

      const middleware = generateCheckAccess({
        permissionType: PermissionTypes.AGENTS,
-        permissions: [Permissions.USE, Permissions.CREATE, Permissions.SHARED_GLOBAL],
+        permissions: [Permissions.USE, Permissions.CREATE, Permissions.SHARE],
        getRoleByName,
      });
      await middleware(req, res, next);
@ -349,7 +349,7 @@ describe('Access Middleware', () => {
          [PermissionTypes.AGENTS]: {
            [Permissions.USE]: false,
            [Permissions.CREATE]: false,
-            [Permissions.SHARED_GLOBAL]: false,
+            [Permissions.SHARE]: false,
          },
        },
      });
--- a/api/server/routes/test-utils/convos-route-mocks.js
+++ b/api/server/routes/test-utils/convos-route-mocks.js
@ -0,0 +1,93 @@
+module.exports = {
+  agents: () => ({ sleep: jest.fn() }),
+
+  api: (overrides = {}) => ({
+    isEnabled: jest.fn(),
+    resolveImportMaxFileSize: jest.fn(() => 262144000),
+    createAxiosInstance: jest.fn(() => ({
+      get: jest.fn(),
+      post: jest.fn(),
+      put: jest.fn(),
+      delete: jest.fn(),
+    })),
+    logAxiosError: jest.fn(),
+    ...overrides,
+  }),
+
+  dataSchemas: () => ({
+    logger: {
+      debug: jest.fn(),
+      info: jest.fn(),
+      warn: jest.fn(),
+      error: jest.fn(),
+    },
+    createModels: jest.fn(() => ({
+      User: {},
+      Conversation: {},
+      Message: {},
+      SharedLink: {},
+    })),
+  }),
+
+  dataProvider: (overrides = {}) => ({
+    CacheKeys: { GEN_TITLE: 'GEN_TITLE' },
+    EModelEndpoint: {
+      azureAssistants: 'azureAssistants',
+      assistants: 'assistants',
+    },
+    ...overrides,
+  }),
+
+  conversationModel: () => ({
+    getConvosByCursor: jest.fn(),
+    getConvo: jest.fn(),
+    deleteConvos: jest.fn(),
+    saveConvo: jest.fn(),
+  }),
+
+  toolCallModel: () => ({ deleteToolCalls: jest.fn() }),
+
+  sharedModels: () => ({
+    deleteAllSharedLinks: jest.fn(),
+    deleteConvoSharedLink: jest.fn(),
+  }),
+
+  requireJwtAuth: () => (req, res, next) => next(),
+
+  middlewarePassthrough: () => ({
+    createImportLimiters: jest.fn(() => ({
+      importIpLimiter: (req, res, next) => next(),
+      importUserLimiter: (req, res, next) => next(),
+    })),
+    createForkLimiters: jest.fn(() => ({
+      forkIpLimiter: (req, res, next) => next(),
+      forkUserLimiter: (req, res, next) => next(),
+    })),
+    configMiddleware: (req, res, next) => next(),
+    validateConvoAccess: (req, res, next) => next(),
+  }),
+
+  forkUtils: () => ({
+    forkConversation: jest.fn(),
+    duplicateConversation: jest.fn(),
+  }),
+
+  importUtils: () => ({ importConversations: jest.fn() }),
+
+  logStores: () => jest.fn(),
+
+  multerSetup: () => ({
+    storage: {},
+    importFileFilter: jest.fn(),
+  }),
+
+  multerLib: () =>
+    jest.fn(() => ({
+      single: jest.fn(() => (req, res, next) => {
+        req.file = { path: '/tmp/test-file.json' };
+        next();
+      }),
+    })),
+
+  assistantEndpoint: () => ({ initializeClient: jest.fn() }),
+};
--- a/api/server/routes/tests/convos-duplicate-ratelimit.spec.js
+++ b/api/server/routes/tests/convos-duplicate-ratelimit.spec.js
@ -0,0 +1,135 @@
+const express = require('express');
+const request = require('supertest');
+
+const MOCKS = '../__test-utils__/convos-route-mocks';
+
+jest.mock('@librechat/agents', () => require(MOCKS).agents());
+jest.mock('@librechat/api', () => require(MOCKS).api({ limiterCache: jest.fn(() => undefined) }));
+jest.mock('@librechat/data-schemas', () => require(MOCKS).dataSchemas());
+jest.mock('librechat-data-provider', () =>
+  require(MOCKS).dataProvider({ ViolationTypes: { FILE_UPLOAD_LIMIT: 'file_upload_limit' } }),
+);
+
+jest.mock('~/cache/logViolation', () => jest.fn().mockResolvedValue(undefined));
+jest.mock('~/cache/getLogStores', () => require(MOCKS).logStores());
+jest.mock('~/models/Conversation', () => require(MOCKS).conversationModel());
+jest.mock('~/models/ToolCall', () => require(MOCKS).toolCallModel());
+jest.mock('~/models', () => require(MOCKS).sharedModels());
+jest.mock('~/server/middleware/requireJwtAuth', () => require(MOCKS).requireJwtAuth());
+
+jest.mock('~/server/middleware', () => {
+  const { createForkLimiters } = jest.requireActual('~/server/middleware/limiters/forkLimiters');
+  return {
+    createImportLimiters: jest.fn(() => ({
+      importIpLimiter: (req, res, next) => next(),
+      importUserLimiter: (req, res, next) => next(),
+    })),
+    createForkLimiters,
+    configMiddleware: (req, res, next) => next(),
+    validateConvoAccess: (req, res, next) => next(),
+  };
+});
+
+jest.mock('~/server/utils/import/fork', () => require(MOCKS).forkUtils());
+jest.mock('~/server/utils/import', () => require(MOCKS).importUtils());
+jest.mock('~/server/routes/files/multer', () => require(MOCKS).multerSetup());
+jest.mock('multer', () => require(MOCKS).multerLib());
+jest.mock('~/server/services/Endpoints/azureAssistants', () => require(MOCKS).assistantEndpoint());
+jest.mock('~/server/services/Endpoints/assistants', () => require(MOCKS).assistantEndpoint());
+
+describe('POST /api/convos/duplicate - Rate Limiting', () => {
+  let app;
+  let duplicateConversation;
+  const savedEnv = {};
+
+  beforeAll(() => {
+    savedEnv.FORK_USER_MAX = process.env.FORK_USER_MAX;
+    savedEnv.FORK_USER_WINDOW = process.env.FORK_USER_WINDOW;
+    savedEnv.FORK_IP_MAX = process.env.FORK_IP_MAX;
+    savedEnv.FORK_IP_WINDOW = process.env.FORK_IP_WINDOW;
+  });
+
+  afterAll(() => {
+    for (const key of Object.keys(savedEnv)) {
+      if (savedEnv[key] === undefined) {
+        delete process.env[key];
+      } else {
+        process.env[key] = savedEnv[key];
+      }
+    }
+  });
+
+  const setupApp = () => {
+    jest.clearAllMocks();
+    jest.isolateModules(() => {
+      const convosRouter = require('../convos');
+      ({ duplicateConversation } = require('~/server/utils/import/fork'));
+
+      app = express();
+      app.use(express.json());
+      app.use((req, res, next) => {
+        req.user = { id: 'rate-limit-test-user' };
+        next();
+      });
+      app.use('/api/convos', convosRouter);
+    });
+
+    duplicateConversation.mockResolvedValue({
+      conversation: { conversationId: 'duplicated-conv' },
+    });
+  };
+
+  describe('user limit', () => {
+    beforeEach(() => {
+      process.env.FORK_USER_MAX = '2';
+      process.env.FORK_USER_WINDOW = '1';
+      process.env.FORK_IP_MAX = '100';
+      process.env.FORK_IP_WINDOW = '1';
+      setupApp();
+    });
+
+    it('should return 429 after exceeding the user rate limit', async () => {
+      const userMax = parseInt(process.env.FORK_USER_MAX, 10);
+
+      for (let i = 0; i < userMax; i++) {
+        const res = await request(app)
+          .post('/api/convos/duplicate')
+          .send({ conversationId: 'conv-123' });
+        expect(res.status).toBe(201);
+      }
+
+      const res = await request(app)
+        .post('/api/convos/duplicate')
+        .send({ conversationId: 'conv-123' });
+      expect(res.status).toBe(429);
+      expect(res.body.message).toMatch(/too many/i);
+    });
+  });
+
+  describe('IP limit', () => {
+    beforeEach(() => {
+      process.env.FORK_USER_MAX = '100';
+      process.env.FORK_USER_WINDOW = '1';
+      process.env.FORK_IP_MAX = '2';
+      process.env.FORK_IP_WINDOW = '1';
+      setupApp();
+    });
+
+    it('should return 429 after exceeding the IP rate limit', async () => {
+      const ipMax = parseInt(process.env.FORK_IP_MAX, 10);
+
+      for (let i = 0; i < ipMax; i++) {
+        const res = await request(app)
+          .post('/api/convos/duplicate')
+          .send({ conversationId: 'conv-123' });
+        expect(res.status).toBe(201);
+      }
+
+      const res = await request(app)
+        .post('/api/convos/duplicate')
+        .send({ conversationId: 'conv-123' });
+      expect(res.status).toBe(429);
+      expect(res.body.message).toMatch(/too many/i);
+    });
+  });
+});
--- a/Show more
+++ b/Show more