From d18d34d9cf7e5ff3087ae12bccf14cdefc1e4465 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Tue, 24 Mar 2026 13:36:10 -0400
Subject: [PATCH 01/11] feat: cache tool schema token counts to avoid redundant
 recalculation

Add time-based caching (30min TTL) for tool schema token counts using
the existing Keyv/Redis infrastructure. Cache is keyed by provider and
a lightweight fingerprint (sorted tool names + count), so agents sharing
the same tool set share the cached value.

New utility module (toolTokens.ts) provides reusable functions:
- getToolFingerprint: stable fingerprint from tool names
- computeToolSchemaTokens: mirrors AgentContext.calculateInstructionTokens
- getOrComputeToolTokens: cache lookup with compute-on-miss

In createRun, buildAgentContext is now async with Promise.all for
parallel cache lookups in multi-agent runs. Pre-computed tokens are
passed via AgentInputs.toolSchemaTokens, skipping calculateInstructionTokens
in @librechat/agents entirely on cache hit.
---
 api/cache/getLogStores.js             |   1 +
 packages/api/src/agents/run.ts        |  23 +++-
 packages/api/src/agents/toolTokens.ts | 169 ++++++++++++++++++++++++++
 packages/data-provider/src/config.ts  |   5 +
 4 files changed, 193 insertions(+), 5 deletions(-)
 create mode 100644 packages/api/src/agents/toolTokens.ts
diff --git a/api/cache/getLogStores.js b/api/cache/getLogStores.js
index 70eb681e53..21dedde7ec 100644
--- a/api/cache/getLogStores.js
+++ b/api/cache/getLogStores.js
@@ -56,6 +56,7 @@ const namespaces = {
     CacheKeys.ADMIN_OAUTH_EXCHANGE,
     Time.THIRTY_SECONDS,
   ),
+  [CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES),
 };
 
 /**
diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index b6b5e6a14d..d0d8582819 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -18,6 +18,7 @@ import type { BaseMessage } from '@langchain/core/messages';
 import type { IUser } from '@librechat/data-schemas';
 import type * as t from '~/types';
 import { resolveHeaders, createSafeUser } from '~/utils/env';
+import { getOrComputeToolTokens } from './toolTokens';
 
 /** Expected shape of JSON tool search results */
 interface ToolSearchJsonResult {
@@ -296,7 +297,7 @@ export async function createRun({
       : new Set<string>();
 
   const agentInputs: AgentInputs[] = [];
-  const buildAgentContext = (agent: RunAgent) => {
+  const buildAgentContext = async (agent: RunAgent): Promise<AgentInputs> => {
     const provider =
       (providerEndpointMap[
         agent.provider as keyof typeof providerEndpointMap
@@ -381,11 +382,24 @@ export async function createRun({
       agent.maxContextTokens,
     );
 
+    /** Resolve cached or computed tool schema tokens */
+    let toolSchemaTokens: number | undefined;
+    if (tokenCounter) {
+      toolSchemaTokens = await getOrComputeToolTokens({
+        tools: agent.tools,
+        toolDefinitions,
+        provider,
+        clientOptions: llmConfig,
+        tokenCounter,
+      });
+    }
+
     const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint);
     const agentInput: AgentInputs = {
       provider,
       reasoningKey,
       toolDefinitions,
+      toolSchemaTokens,
       agentId: agent.id,
       tools: agent.tools,
       clientOptions: llmConfig,
@@ -401,12 +415,11 @@ export async function createRun({
       contextPruningConfig: summarization.contextPruning,
       maxToolResultChars: agent.maxToolResultChars,
     };
-    agentInputs.push(agentInput);
+    return agentInput;
   };
 
-  for (const agent of agents) {
-    buildAgentContext(agent);
-  }
+  const resolvedInputs = await Promise.all(agents.map(buildAgentContext));
+  agentInputs.push(...resolvedInputs);
 
   const graphConfig: RunConfig['graphConfig'] = {
     signal,
diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
new file mode 100644
index 0000000000..98afd31500
--- /dev/null
+++ b/packages/api/src/agents/toolTokens.ts
@@ -0,0 +1,169 @@
+import { SystemMessage } from '@langchain/core/messages';
+import {
+  Providers,
+  toJsonSchema,
+  ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
+  DEFAULT_TOOL_TOKEN_MULTIPLIER,
+} from '@librechat/agents';
+import { CacheKeys, Time } from 'librechat-data-provider';
+import { standardCache } from '~/cache';
+import type { Keyv } from 'keyv';
+import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
+
+/** Module-level cache instance, lazily initialized. */
+let toolTokenCache: Keyv | undefined;
+
+function getCache(): Keyv {
+  if (!toolTokenCache) {
+    toolTokenCache = standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES);
+  }
+  return toolTokenCache;
+}
+
+/**
+ * Builds a lightweight fingerprint from tool names.
+ * Sorted and deduplicated to ensure stability regardless of tool ordering.
+ */
+export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
+  const names = new Set<string>();
+
+  if (tools) {
+    for (const tool of tools) {
+      const name = (tool as unknown as Record<string, unknown>).name;
+      if (typeof name === 'string' && name) {
+        names.add(name);
+      }
+    }
+  }
+
+  if (toolDefinitions) {
+    for (const def of toolDefinitions) {
+      if (def.name) {
+        names.add(def.name);
+      }
+    }
+  }
+
+  if (names.size === 0) {
+    return '';
+  }
+
+  const sorted = Array.from(names).sort();
+  return sorted.join(',') + '|' + sorted.length;
+}
+
+/**
+ * Determines the provider-specific token multiplier for tool schemas.
+ */
+function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
+  const isAnthropic =
+    provider !== Providers.BEDROCK &&
+    (provider === Providers.ANTHROPIC ||
+      /anthropic|claude/i.test(
+        String((clientOptions as { model?: string } | undefined)?.model ?? ''),
+      ));
+  return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
+}
+
+/**
+ * Computes tool schema tokens from scratch using the provided token counter.
+ * Mirrors the logic in AgentContext.calculateInstructionTokens().
+ */
+export function computeToolSchemaTokens(
+  tools: GenericTool[] | undefined,
+  toolDefinitions: LCTool[] | undefined,
+  provider: Providers,
+  clientOptions: ClientOptions | undefined,
+  tokenCounter: TokenCounter,
+): number {
+  let toolTokens = 0;
+  const countedToolNames = new Set<string>();
+
+  if (tools && tools.length > 0) {
+    for (const tool of tools) {
+      const genericTool = tool as unknown as Record<string, unknown>;
+      if (genericTool.schema != null && typeof genericTool.schema === 'object') {
+        const toolName = (genericTool.name as string | undefined) ?? '';
+        const jsonSchema = toJsonSchema(
+          genericTool.schema,
+          toolName,
+          (genericTool.description as string | undefined) ?? '',
+        );
+        toolTokens += tokenCounter(new SystemMessage(JSON.stringify(jsonSchema)));
+        if (toolName) {
+          countedToolNames.add(toolName);
+        }
+      }
+    }
+  }
+
+  if (toolDefinitions && toolDefinitions.length > 0) {
+    for (const def of toolDefinitions) {
+      if (countedToolNames.has(def.name)) {
+        continue;
+      }
+      const schema = {
+        type: 'function',
+        function: {
+          name: def.name,
+          description: def.description ?? '',
+          parameters: def.parameters ?? {},
+        },
+      };
+      toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
+    }
+  }
+
+  const multiplier = getToolTokenMultiplier(provider, clientOptions);
+  return Math.ceil(toolTokens * multiplier);
+}
+
+/**
+ * Returns cached tool schema tokens if the fingerprint matches,
+ * otherwise computes them, caches the result (fire-and-forget), and returns.
+ *
+ * Returns 0 if there are no tools (no caching needed).
+ */
+export async function getOrComputeToolTokens({
+  tools,
+  toolDefinitions,
+  provider,
+  clientOptions,
+  tokenCounter,
+}: {
+  tools?: GenericTool[];
+  toolDefinitions?: LCTool[];
+  provider: Providers;
+  clientOptions?: ClientOptions;
+  tokenCounter: TokenCounter;
+}): Promise<number> {
+  const fingerprint = getToolFingerprint(tools, toolDefinitions);
+  if (!fingerprint) {
+    return 0;
+  }
+
+  const cacheKey = `${provider}:${fingerprint}`;
+  const cache = getCache();
+
+  const cached = (await cache.get(cacheKey)) as number | undefined;
+  if (cached != null && cached > 0) {
+    return cached;
+  }
+
+  const tokens = computeToolSchemaTokens(
+    tools,
+    toolDefinitions,
+    provider,
+    clientOptions,
+    tokenCounter,
+  );
+
+  if (tokens > 0) {
+    /** Fire-and-forget write — don't block the run on cache persistence */
+    cache.set(cacheKey, tokens).catch(() => {
+      /* swallow cache write errors */
+    });
+  }
+
+  return tokens;
+}
diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts
index ae3f5b9560..f4429f6547 100644
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@@ -1529,6 +1529,11 @@ export enum CacheKeys {
    * Key for admin panel OAuth exchange codes (one-time-use, short TTL).
    */
   ADMIN_OAUTH_EXCHANGE = 'ADMIN_OAUTH_EXCHANGE',
+  /**
+   * Key for cached tool schema token counts.
+   * Keyed by provider + tool fingerprint to avoid redundant token counting.
+   */
+  TOOL_TOKENS = 'TOOL_TOKENS',
 }
 
 /**

From dc088d19fbb8dbf8f9679a0157b08366e1f9e3c6 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 22:40:13 -0400
Subject: [PATCH 02/11] fix: address review findings for tool token caching

- Wrap cache.get() in try/catch so Redis failures fall through to
  compute instead of killing the agent run (F1)
- Include multiplier classification in cache key to prevent collisions
  between Anthropic and non-Anthropic models sharing a provider (F2)
- Remove dead TOOL_TOKENS entry from getLogStores.js; toolTokens.ts
  manages its own Keyv instance (F4)
- Add logger.debug for cache read/write failures (F7)
- Export toolTokens from agents barrel (F8)
- Fix import ordering (F9)
- Remove intermediate agentInputs array (F10)
- Remove narrating comments (F11)
---
 api/cache/getLogStores.js             |  1 -
 packages/api/src/agents/index.ts      |  1 +
 packages/api/src/agents/run.ts        |  5 +--
 packages/api/src/agents/toolTokens.ts | 45 +++++++++++++--------------
 4 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/api/cache/getLogStores.js b/api/cache/getLogStores.js
index 21dedde7ec..70eb681e53 100644
--- a/api/cache/getLogStores.js
+++ b/api/cache/getLogStores.js
@@ -56,7 +56,6 @@ const namespaces = {
     CacheKeys.ADMIN_OAUTH_EXCHANGE,
     Time.THIRTY_SECONDS,
   ),
-  [CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES),
 };
 
 /**
diff --git a/packages/api/src/agents/index.ts b/packages/api/src/agents/index.ts
index 53f7f60a93..fbc46bfb3e 100644
--- a/packages/api/src/agents/index.ts
+++ b/packages/api/src/agents/index.ts
@@ -19,3 +19,4 @@ export * from './tools';
 export * from './validation';
 export * from './added';
 export * from './load';
+export * from './toolTokens';
diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index d0d8582819..a728ed38bf 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -296,7 +296,6 @@ export async function createRun({
       ? extractDiscoveredToolsFromHistory(messages)
       : new Set<string>();
 
-  const agentInputs: AgentInputs[] = [];
   const buildAgentContext = async (agent: RunAgent): Promise<AgentInputs> => {
     const provider =
       (providerEndpointMap[
@@ -382,7 +381,6 @@ export async function createRun({
       agent.maxContextTokens,
     );
 
-    /** Resolve cached or computed tool schema tokens */
     let toolSchemaTokens: number | undefined;
     if (tokenCounter) {
       toolSchemaTokens = await getOrComputeToolTokens({
@@ -418,8 +416,7 @@ export async function createRun({
     return agentInput;
   };
 
-  const resolvedInputs = await Promise.all(agents.map(buildAgentContext));
-  agentInputs.push(...resolvedInputs);
+  const agentInputs = await Promise.all(agents.map(buildAgentContext));
 
   const graphConfig: RunConfig['graphConfig'] = {
     signal,
diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
index 98afd31500..ee558740a7 100644
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@@ -6,9 +6,12 @@ import {
   DEFAULT_TOOL_TOKEN_MULTIPLIER,
 } from '@librechat/agents';
 import { CacheKeys, Time } from 'librechat-data-provider';
-import { standardCache } from '~/cache';
-import type { Keyv } from 'keyv';
+
 import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
+import type { Keyv } from 'keyv';
+
+import { logger } from '@librechat/data-schemas';
+import { standardCache } from '~/cache';
 
 /** Module-level cache instance, lazily initialized. */
 let toolTokenCache: Keyv | undefined;
@@ -20,10 +23,6 @@ function getCache(): Keyv {
   return toolTokenCache;
 }
 
-/**
- * Builds a lightweight fingerprint from tool names.
- * Sorted and deduplicated to ensure stability regardless of tool ordering.
- */
 export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
   const names = new Set<string>();
 
@@ -52,9 +51,6 @@ export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTo
   return sorted.join(',') + '|' + sorted.length;
 }
 
-/**
- * Determines the provider-specific token multiplier for tool schemas.
- */
 function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
   const isAnthropic =
     provider !== Providers.BEDROCK &&
@@ -65,10 +61,6 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio
   return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
 }
 
-/**
- * Computes tool schema tokens from scratch using the provided token counter.
- * Mirrors the logic in AgentContext.calculateInstructionTokens().
- */
 export function computeToolSchemaTokens(
   tools: GenericTool[] | undefined,
   toolDefinitions: LCTool[] | undefined,
@@ -119,10 +111,10 @@ export function computeToolSchemaTokens(
 }
 
 /**
- * Returns cached tool schema tokens if the fingerprint matches,
- * otherwise computes them, caches the result (fire-and-forget), and returns.
- *
- * Returns 0 if there are no tools (no caching needed).
+ * Returns cached tool schema tokens or computes them on miss.
+ * Returns 0 if there are no tools.
+ * Cache errors are non-fatal — falls through to compute on read failure,
+ * logs on write failure.
  */
 export async function getOrComputeToolTokens({
   tools,
@@ -142,12 +134,18 @@ export async function getOrComputeToolTokens({
     return 0;
   }
 
-  const cacheKey = `${provider}:${fingerprint}`;
+  const multiplier = getToolTokenMultiplier(provider, clientOptions);
+  const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default';
+  const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`;
   const cache = getCache();
 
-  const cached = (await cache.get(cacheKey)) as number | undefined;
-  if (cached != null && cached > 0) {
-    return cached;
+  try {
+    const cached = (await cache.get(cacheKey)) as number | undefined;
+    if (cached != null && cached > 0) {
+      return cached;
+    }
+  } catch (err) {
+    logger.debug('[toolTokens] Cache read failed, computing fresh', err);
   }
 
   const tokens = computeToolSchemaTokens(
@@ -159,9 +157,8 @@ export async function getOrComputeToolTokens({
   );
 
   if (tokens > 0) {
-    /** Fire-and-forget write — don't block the run on cache persistence */
-    cache.set(cacheKey, tokens).catch(() => {
-      /* swallow cache write errors */
+    cache.set(cacheKey, tokens).catch((err: unknown) => {
+      logger.debug('[toolTokens] Cache write failed', err);
     });
   }
 

From 749666503c00913fc2e230a26856a622d3296fe4 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 22:45:29 -0400
Subject: [PATCH 03/11] fix: use Promise.allSettled for buildAgentContext to
 prevent single-agent failure from killing the entire run

---
 packages/api/src/agents/run.ts | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index a728ed38bf..a4d0324b7a 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -15,6 +15,7 @@ import type {
 } from '@librechat/agents';
 import type { Agent, SummarizationConfig } from 'librechat-data-provider';
 import type { BaseMessage } from '@langchain/core/messages';
+import { logger } from '@librechat/data-schemas';
 import type { IUser } from '@librechat/data-schemas';
 import type * as t from '~/types';
 import { resolveHeaders, createSafeUser } from '~/utils/env';
@@ -416,7 +417,16 @@ export async function createRun({
     return agentInput;
   };
 
-  const agentInputs = await Promise.all(agents.map(buildAgentContext));
+  const settled = await Promise.allSettled(agents.map(buildAgentContext));
+  const agentInputs: AgentInputs[] = [];
+  for (let i = 0; i < settled.length; i++) {
+    const result = settled[i];
+    if (result.status === 'fulfilled') {
+      agentInputs.push(result.value);
+    } else {
+      logger.error(`[createRun] buildAgentContext failed for agent ${agents[i].id}`, result.reason);
+    }
+  }
 
   const graphConfig: RunConfig['graphConfig'] = {
     signal,

From 90d32b691e7035a49e5c2ca0c7e4cb50904ea43b Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 22:49:05 -0400
Subject: [PATCH 04/11] refactor: single-pass collectToolData to avoid
 redundant tool iteration

Extract collectToolData() that builds both the fingerprint names and
serialized schemas in one pass over tools + toolDefinitions.
getOrComputeToolTokens uses the pre-collected schemas directly on
cache miss instead of re-looping. getToolFingerprint and
computeToolSchemaTokens delegate to the same shared function for
standalone use.
---
 packages/api/src/agents/toolTokens.ts | 159 ++++++++++++++------------
 1 file changed, 84 insertions(+), 75 deletions(-)

diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
index ee558740a7..55938d42eb 100644
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@@ -23,34 +23,6 @@ function getCache(): Keyv {
   return toolTokenCache;
 }
 
-export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
-  const names = new Set<string>();
-
-  if (tools) {
-    for (const tool of tools) {
-      const name = (tool as unknown as Record<string, unknown>).name;
-      if (typeof name === 'string' && name) {
-        names.add(name);
-      }
-    }
-  }
-
-  if (toolDefinitions) {
-    for (const def of toolDefinitions) {
-      if (def.name) {
-        names.add(def.name);
-      }
-    }
-  }
-
-  if (names.size === 0) {
-    return '';
-  }
-
-  const sorted = Array.from(names).sort();
-  return sorted.join(',') + '|' + sorted.length;
-}
-
 function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
   const isAnthropic =
     provider !== Providers.BEDROCK &&
@@ -61,6 +33,77 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio
   return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
 }
 
+/**
+ * Single pass over tools and toolDefinitions. Collects deduplicated sorted
+ * tool names (for fingerprint) and pre-serialized schemas (for token
+ * counting on cache miss), mirroring the dedup logic in
+ * AgentContext.calculateInstructionTokens().
+ */
+function collectToolData(
+  tools?: GenericTool[],
+  toolDefinitions?: LCTool[],
+): { names: string[]; schemas: string[] } {
+  const nameSet = new Set<string>();
+  const countedNames = new Set<string>();
+  const schemas: string[] = [];
+
+  if (tools) {
+    for (const tool of tools) {
+      const genericTool = tool as unknown as Record<string, unknown>;
+      const toolName = (genericTool.name as string | undefined) ?? '';
+      if (toolName) {
+        nameSet.add(toolName);
+      }
+      if (genericTool.schema != null && typeof genericTool.schema === 'object') {
+        schemas.push(
+          JSON.stringify(
+            toJsonSchema(
+              genericTool.schema,
+              toolName,
+              (genericTool.description as string | undefined) ?? '',
+            ),
+          ),
+        );
+        if (toolName) {
+          countedNames.add(toolName);
+        }
+      }
+    }
+  }
+
+  if (toolDefinitions) {
+    for (const def of toolDefinitions) {
+      if (def.name) {
+        nameSet.add(def.name);
+      }
+      if (countedNames.has(def.name)) {
+        continue;
+      }
+      schemas.push(
+        JSON.stringify({
+          type: 'function',
+          function: {
+            name: def.name,
+            description: def.description ?? '',
+            parameters: def.parameters ?? {},
+          },
+        }),
+      );
+    }
+  }
+
+  const names = nameSet.size > 0 ? Array.from(nameSet).sort() : [];
+  return { names, schemas };
+}
+
+export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
+  const { names } = collectToolData(tools, toolDefinitions);
+  if (names.length === 0) {
+    return '';
+  }
+  return names.join(',') + '|' + names.length;
+}
+
 export function computeToolSchemaTokens(
   tools: GenericTool[] | undefined,
   toolDefinitions: LCTool[] | undefined,
@@ -68,44 +111,11 @@ export function computeToolSchemaTokens(
   clientOptions: ClientOptions | undefined,
   tokenCounter: TokenCounter,
 ): number {
+  const { schemas } = collectToolData(tools, toolDefinitions);
   let toolTokens = 0;
-  const countedToolNames = new Set<string>();
-
-  if (tools && tools.length > 0) {
-    for (const tool of tools) {
-      const genericTool = tool as unknown as Record<string, unknown>;
-      if (genericTool.schema != null && typeof genericTool.schema === 'object') {
-        const toolName = (genericTool.name as string | undefined) ?? '';
-        const jsonSchema = toJsonSchema(
-          genericTool.schema,
-          toolName,
-          (genericTool.description as string | undefined) ?? '',
-        );
-        toolTokens += tokenCounter(new SystemMessage(JSON.stringify(jsonSchema)));
-        if (toolName) {
-          countedToolNames.add(toolName);
-        }
-      }
-    }
+  for (const schema of schemas) {
+    toolTokens += tokenCounter(new SystemMessage(schema));
   }
-
-  if (toolDefinitions && toolDefinitions.length > 0) {
-    for (const def of toolDefinitions) {
-      if (countedToolNames.has(def.name)) {
-        continue;
-      }
-      const schema = {
-        type: 'function',
-        function: {
-          name: def.name,
-          description: def.description ?? '',
-          parameters: def.parameters ?? {},
-        },
-      };
-      toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
-    }
-  }
-
   const multiplier = getToolTokenMultiplier(provider, clientOptions);
   return Math.ceil(toolTokens * multiplier);
 }
@@ -113,8 +123,8 @@ export function computeToolSchemaTokens(
 /**
  * Returns cached tool schema tokens or computes them on miss.
  * Returns 0 if there are no tools.
- * Cache errors are non-fatal — falls through to compute on read failure,
- * logs on write failure.
+ * Single pass over tool arrays: builds fingerprint and serialized schemas
+ * together, then only runs the token counter if the cache misses.
  */
 export async function getOrComputeToolTokens({
   tools,
@@ -129,11 +139,12 @@ export async function getOrComputeToolTokens({
   clientOptions?: ClientOptions;
   tokenCounter: TokenCounter;
 }): Promise<number> {
-  const fingerprint = getToolFingerprint(tools, toolDefinitions);
-  if (!fingerprint) {
+  const { names, schemas } = collectToolData(tools, toolDefinitions);
+  if (names.length === 0) {
     return 0;
   }
 
+  const fingerprint = names.join(',') + '|' + names.length;
   const multiplier = getToolTokenMultiplier(provider, clientOptions);
   const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default';
   const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`;
@@ -148,13 +159,11 @@ export async function getOrComputeToolTokens({
     logger.debug('[toolTokens] Cache read failed, computing fresh', err);
   }
 
-  const tokens = computeToolSchemaTokens(
-    tools,
-    toolDefinitions,
-    provider,
-    clientOptions,
-    tokenCounter,
-  );
+  let toolTokens = 0;
+  for (const schema of schemas) {
+    toolTokens += tokenCounter(new SystemMessage(schema));
+  }
+  const tokens = Math.ceil(toolTokens * multiplier);
 
   if (tokens > 0) {
     cache.set(cacheKey, tokens).catch((err: unknown) => {

From 80727a0c8697ca742f71e5fdb72ebf310e7741b2 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 22:52:49 -0400
Subject: [PATCH 05/11] test: add comprehensive tests for toolTokens utility
 module

Covers all three exported functions:

getToolFingerprint:
- empty inputs, sorted output, dedup across tools/defs, ordering stability

computeToolSchemaTokens:
- empty inputs, GenericTool schemas, LCTool definitions, deduplication,
  Anthropic multiplier (provider, model name, Bedrock exclusion)

getOrComputeToolTokens:
- empty inputs, cache miss compute+write, cache hit (no recompute),
  per-provider separation, shared cache across agents, recompute on
  tool change, cache read failure fallback, cache write failure resilience,
  GenericTool fingerprint verification
---
 packages/api/src/agents/toolTokens.spec.ts | 377 +++++++++++++++++++++
 1 file changed, 377 insertions(+)
 create mode 100644 packages/api/src/agents/toolTokens.spec.ts

diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts
new file mode 100644
index 0000000000..f5f0db870e
--- /dev/null
+++ b/packages/api/src/agents/toolTokens.spec.ts
@@ -0,0 +1,377 @@
+import { z } from 'zod';
+import { SystemMessage } from '@langchain/core/messages';
+import { DynamicStructuredTool } from '@langchain/core/tools';
+import {
+  Providers,
+  ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
+  DEFAULT_TOOL_TOKEN_MULTIPLIER,
+} from '@librechat/agents';
+import type { GenericTool, LCTool, TokenCounter } from '@librechat/agents';
+import { getToolFingerprint, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens';
+
+/* ---------- Mock standardCache to use a plain Map (no Redis) ---------- */
+const mockCacheStore = new Map<string, unknown>();
+jest.mock('~/cache', () => ({
+  standardCache: jest.fn(() => ({
+    get: jest.fn((key: string) => Promise.resolve(mockCacheStore.get(key))),
+    set: jest.fn((key: string, value: unknown) => {
+      mockCacheStore.set(key, value);
+      return Promise.resolve(true);
+    }),
+  })),
+}));
+
+jest.mock('@librechat/data-schemas', () => ({
+  logger: { debug: jest.fn(), error: jest.fn(), warn: jest.fn(), info: jest.fn() },
+}));
+
+/* ---------- Helpers ---------- */
+
+function makeTool(name: string, description = `${name} description`): GenericTool {
+  return new DynamicStructuredTool({
+    name,
+    description,
+    schema: z.object({ input: z.string().optional() }),
+    func: async () => 'ok',
+  }) as unknown as GenericTool;
+}
+
+function makeToolDef(name: string, description?: string): LCTool {
+  return {
+    name,
+    description: description ?? `${name} description`,
+    parameters: { type: 'object', properties: { input: { type: 'string' } } },
+  };
+}
+
+/** Token counter that returns the string length of message content (deterministic). */
+const fakeTokenCounter: TokenCounter = (msg) => {
+  const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
+  return content.length;
+};
+
+beforeEach(() => {
+  mockCacheStore.clear();
+});
+
+/* ========================================================================= */
+/*  getToolFingerprint                                                       */
+/* ========================================================================= */
+
+describe('getToolFingerprint', () => {
+  it('returns empty string when no tools or definitions provided', () => {
+    expect(getToolFingerprint()).toBe('');
+    expect(getToolFingerprint([], [])).toBe('');
+  });
+
+  it('returns sorted names with count from GenericTool array', () => {
+    const tools = [makeTool('beta'), makeTool('alpha')];
+    expect(getToolFingerprint(tools)).toBe('alpha,beta|2');
+  });
+
+  it('returns sorted names with count from LCTool definitions', () => {
+    const defs = [makeToolDef('zulu'), makeToolDef('alpha')];
+    expect(getToolFingerprint(undefined, defs)).toBe('alpha,zulu|2');
+  });
+
+  it('deduplicates names across tools and toolDefinitions', () => {
+    const tools = [makeTool('shared'), makeTool('only_tool')];
+    const defs = [makeToolDef('shared'), makeToolDef('only_def')];
+    expect(getToolFingerprint(tools, defs)).toBe('only_def,only_tool,shared|3');
+  });
+
+  it('is stable regardless of input ordering', () => {
+    const a = getToolFingerprint([makeTool('x'), makeTool('a'), makeTool('m')]);
+    const b = getToolFingerprint([makeTool('m'), makeTool('x'), makeTool('a')]);
+    expect(a).toBe(b);
+    expect(a).toBe('a,m,x|3');
+  });
+});
+
+/* ========================================================================= */
+/*  computeToolSchemaTokens                                                  */
+/* ========================================================================= */
+
+describe('computeToolSchemaTokens', () => {
+  it('returns 0 when no tools provided', () => {
+    expect(
+      computeToolSchemaTokens(undefined, undefined, Providers.OPENAI, undefined, fakeTokenCounter),
+    ).toBe(0);
+    expect(computeToolSchemaTokens([], [], Providers.OPENAI, undefined, fakeTokenCounter)).toBe(0);
+  });
+
+  it('counts tokens from GenericTool schemas', () => {
+    const tools = [makeTool('test_tool')];
+    const result = computeToolSchemaTokens(
+      tools,
+      undefined,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+    expect(result).toBeGreaterThan(0);
+  });
+
+  it('counts tokens from LCTool definitions', () => {
+    const defs = [makeToolDef('test_def')];
+    const result = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+    expect(result).toBeGreaterThan(0);
+  });
+
+  it('deduplicates: tool counted from tools array is skipped in toolDefinitions', () => {
+    const tools = [makeTool('shared')];
+    const defs = [makeToolDef('shared')];
+
+    const toolsOnly = computeToolSchemaTokens(
+      tools,
+      undefined,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+    const both = computeToolSchemaTokens(
+      tools,
+      defs,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+
+    expect(both).toBe(toolsOnly);
+  });
+
+  it('applies Anthropic multiplier for Anthropic provider', () => {
+    const defs = [makeToolDef('tool')];
+    const openai = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+    const anthropic = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.ANTHROPIC,
+      undefined,
+      fakeTokenCounter,
+    );
+
+    const expectedRatio = ANTHROPIC_TOOL_TOKEN_MULTIPLIER / DEFAULT_TOOL_TOKEN_MULTIPLIER;
+    expect(anthropic / openai).toBeCloseTo(expectedRatio, 1);
+  });
+
+  it('applies Anthropic multiplier when model name contains "claude"', () => {
+    const defs = [makeToolDef('tool')];
+    const clientOptions = { model: 'claude-3-opus' };
+    const result = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.OPENAI,
+      clientOptions,
+      fakeTokenCounter,
+    );
+
+    const defaultResult = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+    expect(result).toBeGreaterThan(defaultResult);
+  });
+
+  it('does not apply Anthropic multiplier for Bedrock even with claude model', () => {
+    const defs = [makeToolDef('tool')];
+    const clientOptions = { model: 'claude-3-opus' };
+    const bedrock = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.BEDROCK,
+      clientOptions,
+      fakeTokenCounter,
+    );
+    const defaultResult = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+
+    expect(bedrock).toBe(defaultResult);
+  });
+});
+
+/* ========================================================================= */
+/*  getOrComputeToolTokens                                                   */
+/* ========================================================================= */
+
+describe('getOrComputeToolTokens', () => {
+  it('returns 0 when no tools provided', async () => {
+    const result = await getOrComputeToolTokens({
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+    expect(result).toBe(0);
+  });
+
+  it('computes and caches tokens on first call', async () => {
+    const defs = [makeToolDef('tool_a'), makeToolDef('tool_b')];
+    const result = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(result).toBeGreaterThan(0);
+    expect(mockCacheStore.size).toBe(1);
+
+    const cachedValue = Array.from(mockCacheStore.values())[0];
+    expect(cachedValue).toBe(result);
+  });
+
+  it('returns cached value on second call without recomputing', async () => {
+    const defs = [makeToolDef('tool_a')];
+    const counter = jest.fn(fakeTokenCounter);
+
+    const first = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: counter,
+    });
+
+    const callCountAfterFirst = counter.mock.calls.length;
+
+    const second = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: counter,
+    });
+
+    expect(second).toBe(first);
+    expect(counter.mock.calls.length).toBe(callCountAfterFirst);
+  });
+
+  it('caches separately for different providers with different multipliers', async () => {
+    const defs = [makeToolDef('tool')];
+
+    const openai = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    const anthropic = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.ANTHROPIC,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(openai).not.toBe(anthropic);
+    expect(mockCacheStore.size).toBe(2);
+  });
+
+  it('shares cache for same provider+tools across calls with different agents', async () => {
+    const defs = [makeToolDef('shared_tool')];
+
+    const first = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    const second = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(first).toBe(second);
+    expect(mockCacheStore.size).toBe(1);
+  });
+
+  it('recomputes when tool set changes', async () => {
+    const first = await getOrComputeToolTokens({
+      toolDefinitions: [makeToolDef('tool_a')],
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    const second = await getOrComputeToolTokens({
+      toolDefinitions: [makeToolDef('tool_a'), makeToolDef('tool_b')],
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(second).not.toBe(first);
+    expect(second).toBeGreaterThan(first);
+    expect(mockCacheStore.size).toBe(2);
+  });
+
+  it('falls back to compute when cache read throws', async () => {
+    const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock };
+    const failingCache = {
+      get: jest.fn(() => Promise.reject(new Error('Redis down'))),
+      set: jest.fn(() => Promise.resolve(true)),
+    };
+    standardCache.mockReturnValueOnce(failingCache);
+
+    /** Reset the module-level cache so it picks up the failing mock */
+    jest.resetModules();
+    const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens');
+
+    const defs = [makeToolDef('tool')];
+    const result = await freshGetOrCompute({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(result).toBeGreaterThan(0);
+  });
+
+  it('does not throw when cache write fails', async () => {
+    const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock };
+    const writeFailCache = {
+      get: jest.fn(() => Promise.resolve(undefined)),
+      set: jest.fn(() => Promise.reject(new Error('Redis write error'))),
+    };
+    standardCache.mockReturnValueOnce(writeFailCache);
+
+    jest.resetModules();
+    const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens');
+
+    const defs = [makeToolDef('tool')];
+    const result = await freshGetOrCompute({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(result).toBeGreaterThan(0);
+  });
+
+  it('uses GenericTool tools for fingerprint and token counting', async () => {
+    const tools = [makeTool('alpha'), makeTool('beta')];
+
+    const result = await getOrComputeToolTokens({
+      tools,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(result).toBeGreaterThan(0);
+    expect(mockCacheStore.size).toBe(1);
+
+    const key = Array.from(mockCacheStore.keys())[0];
+    expect(key).toContain('alpha,beta|2');
+  });
+});

From af2cbbcc54c76c7837b86737777c1347627508df Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 22:58:14 -0400
Subject: [PATCH 06/11] fix: guard against empty/partial agent initialization
 failures

- Throw if all agents fail to initialize (empty agentInputs)
- Throw if any agent fails in a routed multi-agent run (partial graph)
- Move getCache() inside try/catch so cache init errors are non-fatal
- Add inline comment explaining deliberate non-delegation of token loop
- Fix import ordering in run.ts and toolTokens.ts
---
 packages/api/src/agents/run.ts        | 19 ++++++++++++++++-
 packages/api/src/agents/toolTokens.ts | 30 +++++++++++++++++++--------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index a4d0324b7a..ae9062d9a9 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -1,5 +1,6 @@
 import { Run, Providers, Constants } from '@librechat/agents';
 import { providerEndpointMap, KnownEndpoints } from 'librechat-data-provider';
+import { logger } from '@librechat/data-schemas';
 import type {
   SummarizationConfig as AgentSummarizationConfig,
   MultiAgentGraphConfig,
@@ -15,7 +16,6 @@ import type {
 } from '@librechat/agents';
 import type { Agent, SummarizationConfig } from 'librechat-data-provider';
 import type { BaseMessage } from '@langchain/core/messages';
-import { logger } from '@librechat/data-schemas';
 import type { IUser } from '@librechat/data-schemas';
 import type * as t from '~/types';
 import { resolveHeaders, createSafeUser } from '~/utils/env';
@@ -428,6 +428,23 @@ export async function createRun({
     }
   }
 
+  if (agentInputs.length === 0) {
+    throw new Error(
+      `[createRun] All ${agents.length} agent(s) failed to initialize; cannot create run`,
+    );
+  }
+
+  const hasEdges = (agents[0].edges?.length ?? 0) > 0;
+  if (agentInputs.length < agents.length && hasEdges) {
+    const failedIds = agents
+      .filter((_, i) => settled[i].status === 'rejected')
+      .map((a) => a.id)
+      .join(', ');
+    throw new Error(
+      `[createRun] Agent(s) [${failedIds}] failed in a routed multi-agent run; cannot proceed with partial graph`,
+    );
+  }
+
   const graphConfig: RunConfig['graphConfig'] = {
     signal,
     agents: agentInputs,
diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
index 55938d42eb..6752b0cdb0 100644
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@@ -6,11 +6,11 @@ import {
   DEFAULT_TOOL_TOKEN_MULTIPLIER,
 } from '@librechat/agents';
 import { CacheKeys, Time } from 'librechat-data-provider';
+import { logger } from '@librechat/data-schemas';
 
 import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
 import type { Keyv } from 'keyv';
 
-import { logger } from '@librechat/data-schemas';
 import { standardCache } from '~/cache';
 
 /** Module-level cache instance, lazily initialized. */
@@ -34,10 +34,14 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio
 }
 
 /**
- * Single pass over tools and toolDefinitions. Collects deduplicated sorted
- * tool names (for fingerprint) and pre-serialized schemas (for token
- * counting on cache miss), mirroring the dedup logic in
- * AgentContext.calculateInstructionTokens().
+ * Single pass over tools and toolDefinitions. Collects:
+ * - `names`: deduplicated, sorted tool names for fingerprinting.
+ * - `schemas`: pre-serialized JSON strings for token counting.
+ *
+ * `nameSet` tracks all tool names (for the fingerprint). `countedNames`
+ * tracks which tools contributed a schema from the `tools` array — a
+ * toolDefinition whose name is in `countedNames` is skipped to avoid
+ * double-counting, mirroring AgentContext.calculateInstructionTokens().
  */
 function collectToolData(
   tools?: GenericTool[],
@@ -148,9 +152,9 @@ export async function getOrComputeToolTokens({
   const multiplier = getToolTokenMultiplier(provider, clientOptions);
   const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default';
   const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`;
-  const cache = getCache();
 
   try {
+    const cache = getCache();
     const cached = (await cache.get(cacheKey)) as number | undefined;
     if (cached != null && cached > 0) {
       return cached;
@@ -159,6 +163,8 @@ export async function getOrComputeToolTokens({
     logger.debug('[toolTokens] Cache read failed, computing fresh', err);
   }
 
+  // Inline token count — not delegating to computeToolSchemaTokens to avoid
+  // a second collectToolData pass; schemas are already built above.
   let toolTokens = 0;
   for (const schema of schemas) {
     toolTokens += tokenCounter(new SystemMessage(schema));
@@ -166,9 +172,15 @@ export async function getOrComputeToolTokens({
   const tokens = Math.ceil(toolTokens * multiplier);
 
   if (tokens > 0) {
-    cache.set(cacheKey, tokens).catch((err: unknown) => {
-      logger.debug('[toolTokens] Cache write failed', err);
-    });
+    try {
+      getCache()
+        .set(cacheKey, tokens)
+        .catch((err: unknown) => {
+          logger.debug('[toolTokens] Cache write failed', err);
+        });
+    } catch {
+      // getCache() init failure on write path — non-fatal
+    }
   }
 
   return tokens;

From 54d1a36101b3dd7941a0c8144413e728dbb22de2 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 22:59:24 -0400
Subject: [PATCH 07/11] chore: import order

---
 packages/api/src/agents/run.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index ae9062d9a9..805f1e5df8 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -1,6 +1,6 @@
+import { logger } from '@librechat/data-schemas';
 import { Run, Providers, Constants } from '@librechat/agents';
 import { providerEndpointMap, KnownEndpoints } from 'librechat-data-provider';
-import { logger } from '@librechat/data-schemas';
 import type {
   SummarizationConfig as AgentSummarizationConfig,
   MultiAgentGraphConfig,

From 06eb21c6537b1bea53d9273fef04c969f00724fa Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 23:07:56 -0400
Subject: [PATCH 08/11] fix: repair broken cache-error tests and fix import
 ordering

- Hoist mockGet/mockSet as module-level jest.fn() instances so
  mockRejectedValueOnce targets the actual mock consumed by the module.
  Previously jest.resetModules() created a new mock instance, so the
  cache-error tests were silently testing the normal miss path.
- Reset mock implementations in beforeEach for test isolation.
- Fix section 1 import order in toolTokens.ts (shortest to longest).
- Add blank-line separators between import sections in spec file.
---
 packages/api/src/agents/toolTokens.spec.ts | 54 +++++++++-------------
 packages/api/src/agents/toolTokens.ts      |  4 +-
 2 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts
index f5f0db870e..cc678bd514 100644
--- a/packages/api/src/agents/toolTokens.spec.ts
+++ b/packages/api/src/agents/toolTokens.spec.ts
@@ -1,24 +1,25 @@
 import { z } from 'zod';
-import { SystemMessage } from '@langchain/core/messages';
 import { DynamicStructuredTool } from '@langchain/core/tools';
 import {
   Providers,
   ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
   DEFAULT_TOOL_TOKEN_MULTIPLIER,
 } from '@librechat/agents';
+
 import type { GenericTool, LCTool, TokenCounter } from '@librechat/agents';
+
 import { getToolFingerprint, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens';
 
-/* ---------- Mock standardCache to use a plain Map (no Redis) ---------- */
+/* ---------- Mock standardCache with hoisted get/set for per-test overrides ---------- */
 const mockCacheStore = new Map<string, unknown>();
+const mockGet = jest.fn((key: string) => Promise.resolve(mockCacheStore.get(key)));
+const mockSet = jest.fn((key: string, value: unknown) => {
+  mockCacheStore.set(key, value);
+  return Promise.resolve(true);
+});
+
 jest.mock('~/cache', () => ({
-  standardCache: jest.fn(() => ({
-    get: jest.fn((key: string) => Promise.resolve(mockCacheStore.get(key))),
-    set: jest.fn((key: string, value: unknown) => {
-      mockCacheStore.set(key, value);
-      return Promise.resolve(true);
-    }),
-  })),
+  standardCache: jest.fn(() => ({ get: mockGet, set: mockSet })),
 }));
 
 jest.mock('@librechat/data-schemas', () => ({
@@ -52,6 +53,11 @@ const fakeTokenCounter: TokenCounter = (msg) => {
 
 beforeEach(() => {
   mockCacheStore.clear();
+  mockGet.mockImplementation((key: string) => Promise.resolve(mockCacheStore.get(key)));
+  mockSet.mockImplementation((key: string, value: unknown) => {
+    mockCacheStore.set(key, value);
+    return Promise.resolve(true);
+  });
 });
 
 /* ========================================================================= */
@@ -177,7 +183,6 @@ describe('computeToolSchemaTokens', () => {
       clientOptions,
       fakeTokenCounter,
     );
-
     const defaultResult = computeToolSchemaTokens(
       undefined,
       defs,
@@ -317,46 +322,31 @@ describe('getOrComputeToolTokens', () => {
   });
 
   it('falls back to compute when cache read throws', async () => {
-    const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock };
-    const failingCache = {
-      get: jest.fn(() => Promise.reject(new Error('Redis down'))),
-      set: jest.fn(() => Promise.resolve(true)),
-    };
-    standardCache.mockReturnValueOnce(failingCache);
-
-    /** Reset the module-level cache so it picks up the failing mock */
-    jest.resetModules();
-    const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens');
+    mockGet.mockRejectedValueOnce(new Error('Redis down'));
 
     const defs = [makeToolDef('tool')];
-    const result = await freshGetOrCompute({
+    const result = await getOrComputeToolTokens({
       toolDefinitions: defs,
       provider: Providers.OPENAI,
       tokenCounter: fakeTokenCounter,
     });
 
     expect(result).toBeGreaterThan(0);
+    expect(mockGet).toHaveBeenCalled();
   });
 
   it('does not throw when cache write fails', async () => {
-    const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock };
-    const writeFailCache = {
-      get: jest.fn(() => Promise.resolve(undefined)),
-      set: jest.fn(() => Promise.reject(new Error('Redis write error'))),
-    };
-    standardCache.mockReturnValueOnce(writeFailCache);
+    mockSet.mockRejectedValueOnce(new Error('Redis write error'));
 
-    jest.resetModules();
-    const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens');
-
-    const defs = [makeToolDef('tool')];
-    const result = await freshGetOrCompute({
+    const defs = [makeToolDef('tool_write_fail')];
+    const result = await getOrComputeToolTokens({
       toolDefinitions: defs,
       provider: Providers.OPENAI,
       tokenCounter: fakeTokenCounter,
     });
 
     expect(result).toBeGreaterThan(0);
+    expect(mockSet).toHaveBeenCalled();
   });
 
   it('uses GenericTool tools for fingerprint and token counting', async () => {
diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
index 6752b0cdb0..e8a7104020 100644
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@@ -1,12 +1,12 @@
+import { logger } from '@librechat/data-schemas';
 import { SystemMessage } from '@langchain/core/messages';
+import { CacheKeys, Time } from 'librechat-data-provider';
 import {
   Providers,
   toJsonSchema,
   ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
   DEFAULT_TOOL_TOKEN_MULTIPLIER,
 } from '@librechat/agents';
-import { CacheKeys, Time } from 'librechat-data-provider';
-import { logger } from '@librechat/data-schemas';
 
 import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
 import type { Keyv } from 'keyv';

From df99e33543dd3fce42d2a073ef9e1aaf8f35e37a Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 1 Apr 2026 23:15:40 -0400
Subject: [PATCH 09/11] refactor: per-tool token count caching instead of
 per-tool-set

Cache each tool's raw (pre-multiplier) token count individually by
name, rather than caching the total for an entire tool set by
fingerprint. This means:

- Adding/removing a tool only requires computing the new one
- Agents sharing some but not all tools benefit from shared cache hits
- The provider-specific multiplier is applied fresh to the sum each time
  (trivial cost), so raw counts are provider-agnostic and maximally shared

Replace collectToolData/getToolFingerprint with collectToolSchemas
which returns a Map<name, serializedJson>. getOrComputeToolTokens
iterates the map, checking cache per tool and only running tokenCounter
on misses.
---
 packages/api/src/agents/toolTokens.spec.ts | 122 +++++++-------
 packages/api/src/agents/toolTokens.ts      | 184 ++++++++++-----------
 2 files changed, 153 insertions(+), 153 deletions(-)

diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts
index cc678bd514..27461e0d67 100644
--- a/packages/api/src/agents/toolTokens.spec.ts
+++ b/packages/api/src/agents/toolTokens.spec.ts
@@ -8,7 +8,7 @@ import {
 
 import type { GenericTool, LCTool, TokenCounter } from '@librechat/agents';
 
-import { getToolFingerprint, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens';
+import { collectToolSchemas, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens';
 
 /* ---------- Mock standardCache with hoisted get/set for per-test overrides ---------- */
 const mockCacheStore = new Map<string, unknown>();
@@ -61,36 +61,38 @@ beforeEach(() => {
 });
 
 /* ========================================================================= */
-/*  getToolFingerprint                                                       */
+/*  collectToolSchemas                                                       */
 /* ========================================================================= */
 
-describe('getToolFingerprint', () => {
-  it('returns empty string when no tools or definitions provided', () => {
-    expect(getToolFingerprint()).toBe('');
-    expect(getToolFingerprint([], [])).toBe('');
+describe('collectToolSchemas', () => {
+  it('returns empty map when no tools provided', () => {
+    expect(collectToolSchemas().size).toBe(0);
+    expect(collectToolSchemas([], []).size).toBe(0);
   });
 
-  it('returns sorted names with count from GenericTool array', () => {
-    const tools = [makeTool('beta'), makeTool('alpha')];
-    expect(getToolFingerprint(tools)).toBe('alpha,beta|2');
+  it('collects schemas from GenericTool array keyed by name', () => {
+    const tools = [makeTool('alpha'), makeTool('beta')];
+    const schemas = collectToolSchemas(tools);
+    expect(schemas.size).toBe(2);
+    expect(schemas.has('alpha')).toBe(true);
+    expect(schemas.has('beta')).toBe(true);
   });
 
-  it('returns sorted names with count from LCTool definitions', () => {
-    const defs = [makeToolDef('zulu'), makeToolDef('alpha')];
-    expect(getToolFingerprint(undefined, defs)).toBe('alpha,zulu|2');
+  it('collects schemas from LCTool definitions', () => {
+    const defs = [makeToolDef('x'), makeToolDef('y')];
+    const schemas = collectToolSchemas(undefined, defs);
+    expect(schemas.size).toBe(2);
+    expect(schemas.has('x')).toBe(true);
+    expect(schemas.has('y')).toBe(true);
   });
 
-  it('deduplicates names across tools and toolDefinitions', () => {
-    const tools = [makeTool('shared'), makeTool('only_tool')];
+  it('deduplicates: GenericTool takes precedence over matching toolDefinition', () => {
+    const tools = [makeTool('shared')];
     const defs = [makeToolDef('shared'), makeToolDef('only_def')];
-    expect(getToolFingerprint(tools, defs)).toBe('only_def,only_tool,shared|3');
-  });
-
-  it('is stable regardless of input ordering', () => {
-    const a = getToolFingerprint([makeTool('x'), makeTool('a'), makeTool('m')]);
-    const b = getToolFingerprint([makeTool('m'), makeTool('x'), makeTool('a')]);
-    expect(a).toBe(b);
-    expect(a).toBe('a,m,x|3');
+    const schemas = collectToolSchemas(tools, defs);
+    expect(schemas.size).toBe(2);
+    expect(schemas.has('shared')).toBe(true);
+    expect(schemas.has('only_def')).toBe(true);
   });
 });
 
@@ -228,7 +230,7 @@ describe('getOrComputeToolTokens', () => {
     expect(result).toBe(0);
   });
 
-  it('computes and caches tokens on first call', async () => {
+  it('computes and caches each tool individually on first call', async () => {
     const defs = [makeToolDef('tool_a'), makeToolDef('tool_b')];
     const result = await getOrComputeToolTokens({
       toolDefinitions: defs,
@@ -237,13 +239,12 @@ describe('getOrComputeToolTokens', () => {
     });
 
     expect(result).toBeGreaterThan(0);
-    expect(mockCacheStore.size).toBe(1);
-
-    const cachedValue = Array.from(mockCacheStore.values())[0];
-    expect(cachedValue).toBe(result);
+    expect(mockCacheStore.has('tool_a')).toBe(true);
+    expect(mockCacheStore.has('tool_b')).toBe(true);
+    expect(mockCacheStore.size).toBe(2);
   });
 
-  it('returns cached value on second call without recomputing', async () => {
+  it('uses cached per-tool values on second call without recomputing', async () => {
     const defs = [makeToolDef('tool_a')];
     const counter = jest.fn(fakeTokenCounter);
 
@@ -265,7 +266,7 @@ describe('getOrComputeToolTokens', () => {
     expect(counter.mock.calls.length).toBe(callCountAfterFirst);
   });
 
-  it('caches separately for different providers with different multipliers', async () => {
+  it('applies different multipliers for different providers on same cached raw counts', async () => {
     const defs = [makeToolDef('tool')];
 
     const openai = await getOrComputeToolTokens({
@@ -281,43 +282,28 @@ describe('getOrComputeToolTokens', () => {
     });
 
     expect(openai).not.toBe(anthropic);
-    expect(mockCacheStore.size).toBe(2);
-  });
-
-  it('shares cache for same provider+tools across calls with different agents', async () => {
-    const defs = [makeToolDef('shared_tool')];
-
-    const first = await getOrComputeToolTokens({
-      toolDefinitions: defs,
-      provider: Providers.OPENAI,
-      tokenCounter: fakeTokenCounter,
-    });
-
-    const second = await getOrComputeToolTokens({
-      toolDefinitions: defs,
-      provider: Providers.OPENAI,
-      tokenCounter: fakeTokenCounter,
-    });
-
-    expect(first).toBe(second);
+    // Only one cache entry — raw count is provider-agnostic
     expect(mockCacheStore.size).toBe(1);
   });
 
-  it('recomputes when tool set changes', async () => {
-    const first = await getOrComputeToolTokens({
+  it('only computes new tools when tool set grows', async () => {
+    const counter = jest.fn(fakeTokenCounter);
+
+    await getOrComputeToolTokens({
       toolDefinitions: [makeToolDef('tool_a')],
       provider: Providers.OPENAI,
-      tokenCounter: fakeTokenCounter,
+      tokenCounter: counter,
     });
+    const callsAfterFirst = counter.mock.calls.length;
 
-    const second = await getOrComputeToolTokens({
+    await getOrComputeToolTokens({
       toolDefinitions: [makeToolDef('tool_a'), makeToolDef('tool_b')],
       provider: Providers.OPENAI,
-      tokenCounter: fakeTokenCounter,
+      tokenCounter: counter,
     });
 
-    expect(second).not.toBe(first);
-    expect(second).toBeGreaterThan(first);
+    // Only one new tokenCounter call for tool_b
+    expect(counter.mock.calls.length).toBe(callsAfterFirst + 1);
     expect(mockCacheStore.size).toBe(2);
   });
 
@@ -349,7 +335,7 @@ describe('getOrComputeToolTokens', () => {
     expect(mockSet).toHaveBeenCalled();
   });
 
-  it('uses GenericTool tools for fingerprint and token counting', async () => {
+  it('uses GenericTool tools for per-tool caching', async () => {
     const tools = [makeTool('alpha'), makeTool('beta')];
 
     const result = await getOrComputeToolTokens({
@@ -359,9 +345,27 @@ describe('getOrComputeToolTokens', () => {
     });
 
     expect(result).toBeGreaterThan(0);
-    expect(mockCacheStore.size).toBe(1);
+    expect(mockCacheStore.has('alpha')).toBe(true);
+    expect(mockCacheStore.has('beta')).toBe(true);
+  });
 
-    const key = Array.from(mockCacheStore.keys())[0];
-    expect(key).toContain('alpha,beta|2');
+  it('matches computeToolSchemaTokens output for same inputs', async () => {
+    const defs = [makeToolDef('a'), makeToolDef('b'), makeToolDef('c')];
+
+    const cached = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    const direct = computeToolSchemaTokens(
+      undefined,
+      defs,
+      Providers.OPENAI,
+      undefined,
+      fakeTokenCounter,
+    );
+
+    expect(cached).toBe(direct);
   });
 });
diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
index e8a7104020..829c03091e 100644
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@@ -23,7 +23,7 @@ function getCache(): Keyv {
   return toolTokenCache;
 }
 
-function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
+export function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
   const isAnthropic =
     provider !== Providers.BEDROCK &&
     (provider === Providers.ANTHROPIC ||
@@ -33,81 +33,69 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio
   return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
 }
 
+/** Serializes a GenericTool to a JSON string for token counting. Returns null if no schema. */
+function serializeGenericTool(tool: GenericTool): { name: string; json: string } | null {
+  const genericTool = tool as unknown as Record<string, unknown>;
+  const toolName = (genericTool.name as string | undefined) ?? '';
+  if (genericTool.schema == null || typeof genericTool.schema !== 'object') {
+    return null;
+  }
+  const jsonSchema = toJsonSchema(
+    genericTool.schema,
+    toolName,
+    (genericTool.description as string | undefined) ?? '',
+  );
+  return { name: toolName, json: JSON.stringify(jsonSchema) };
+}
+
+/** Serializes an LCTool definition to a JSON string for token counting. */
+function serializeToolDef(def: LCTool): string {
+  return JSON.stringify({
+    type: 'function',
+    function: {
+      name: def.name,
+      description: def.description ?? '',
+      parameters: def.parameters ?? {},
+    },
+  });
+}
+
 /**
- * Single pass over tools and toolDefinitions. Collects:
- * - `names`: deduplicated, sorted tool names for fingerprinting.
- * - `schemas`: pre-serialized JSON strings for token counting.
- *
- * `nameSet` tracks all tool names (for the fingerprint). `countedNames`
- * tracks which tools contributed a schema from the `tools` array — a
- * toolDefinition whose name is in `countedNames` is skipped to avoid
- * double-counting, mirroring AgentContext.calculateInstructionTokens().
+ * Builds a map of tool name → serialized schema JSON. Deduplicates: a tool
+ * present in `tools` (with a schema) takes precedence over a matching
+ * `toolDefinitions` entry, mirroring AgentContext.calculateInstructionTokens().
  */
-function collectToolData(
+export function collectToolSchemas(
   tools?: GenericTool[],
   toolDefinitions?: LCTool[],
-): { names: string[]; schemas: string[] } {
-  const nameSet = new Set<string>();
-  const countedNames = new Set<string>();
-  const schemas: string[] = [];
+): Map<string, string> {
+  const schemas = new Map<string, string>();
 
   if (tools) {
     for (const tool of tools) {
-      const genericTool = tool as unknown as Record<string, unknown>;
-      const toolName = (genericTool.name as string | undefined) ?? '';
-      if (toolName) {
-        nameSet.add(toolName);
-      }
-      if (genericTool.schema != null && typeof genericTool.schema === 'object') {
-        schemas.push(
-          JSON.stringify(
-            toJsonSchema(
-              genericTool.schema,
-              toolName,
-              (genericTool.description as string | undefined) ?? '',
-            ),
-          ),
-        );
-        if (toolName) {
-          countedNames.add(toolName);
-        }
+      const result = serializeGenericTool(tool);
+      if (result && result.name) {
+        schemas.set(result.name, result.json);
       }
     }
   }
 
   if (toolDefinitions) {
     for (const def of toolDefinitions) {
-      if (def.name) {
-        nameSet.add(def.name);
-      }
-      if (countedNames.has(def.name)) {
+      if (!def.name || schemas.has(def.name)) {
         continue;
       }
-      schemas.push(
-        JSON.stringify({
-          type: 'function',
-          function: {
-            name: def.name,
-            description: def.description ?? '',
-            parameters: def.parameters ?? {},
-          },
-        }),
-      );
+      schemas.set(def.name, serializeToolDef(def));
     }
   }
 
-  const names = nameSet.size > 0 ? Array.from(nameSet).sort() : [];
-  return { names, schemas };
-}
-
-export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
-  const { names } = collectToolData(tools, toolDefinitions);
-  if (names.length === 0) {
-    return '';
-  }
-  return names.join(',') + '|' + names.length;
+  return schemas;
 }
 
+/**
+ * Computes tool schema tokens from scratch using the provided token counter.
+ * Mirrors the logic in AgentContext.calculateInstructionTokens().
+ */
 export function computeToolSchemaTokens(
   tools: GenericTool[] | undefined,
   toolDefinitions: LCTool[] | undefined,
@@ -115,20 +103,22 @@ export function computeToolSchemaTokens(
   clientOptions: ClientOptions | undefined,
   tokenCounter: TokenCounter,
 ): number {
-  const { schemas } = collectToolData(tools, toolDefinitions);
-  let toolTokens = 0;
-  for (const schema of schemas) {
-    toolTokens += tokenCounter(new SystemMessage(schema));
+  const schemas = collectToolSchemas(tools, toolDefinitions);
+  let rawTokens = 0;
+  for (const json of schemas.values()) {
+    rawTokens += tokenCounter(new SystemMessage(json));
   }
   const multiplier = getToolTokenMultiplier(provider, clientOptions);
-  return Math.ceil(toolTokens * multiplier);
+  return Math.ceil(rawTokens * multiplier);
 }
 
 /**
- * Returns cached tool schema tokens or computes them on miss.
+ * Returns tool schema tokens, using per-tool caching to avoid redundant
+ * token counting. Each tool's raw (pre-multiplier) token count is cached
+ * individually by name, so adding/removing a tool only requires computing
+ * the new one. The provider-specific multiplier is applied to the sum.
+ *
  * Returns 0 if there are no tools.
- * Single pass over tool arrays: builds fingerprint and serialized schemas
- * together, then only runs the token counter if the cache misses.
  */
 export async function getOrComputeToolTokens({
   tools,
@@ -143,45 +133,51 @@ export async function getOrComputeToolTokens({
   clientOptions?: ClientOptions;
   tokenCounter: TokenCounter;
 }): Promise<number> {
-  const { names, schemas } = collectToolData(tools, toolDefinitions);
-  if (names.length === 0) {
+  const schemas = collectToolSchemas(tools, toolDefinitions);
+  if (schemas.size === 0) {
     return 0;
   }
 
-  const fingerprint = names.join(',') + '|' + names.length;
-  const multiplier = getToolTokenMultiplier(provider, clientOptions);
-  const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default';
-  const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`;
-
+  let cache: Keyv | undefined;
   try {
-    const cache = getCache();
-    const cached = (await cache.get(cacheKey)) as number | undefined;
-    if (cached != null && cached > 0) {
-      return cached;
-    }
+    cache = getCache();
   } catch (err) {
-    logger.debug('[toolTokens] Cache read failed, computing fresh', err);
+    logger.debug('[toolTokens] Cache init failed, computing fresh', err);
   }
 
-  // Inline token count — not delegating to computeToolSchemaTokens to avoid
-  // a second collectToolData pass; schemas are already built above.
-  let toolTokens = 0;
-  for (const schema of schemas) {
-    toolTokens += tokenCounter(new SystemMessage(schema));
-  }
-  const tokens = Math.ceil(toolTokens * multiplier);
+  let rawTotal = 0;
+  const toWrite: Array<{ key: string; value: number }> = [];
 
-  if (tokens > 0) {
-    try {
-      getCache()
-        .set(cacheKey, tokens)
-        .catch((err: unknown) => {
-          logger.debug('[toolTokens] Cache write failed', err);
-        });
-    } catch {
-      // getCache() init failure on write path — non-fatal
+  for (const [name, json] of schemas) {
+    let rawCount: number | undefined;
+
+    if (cache) {
+      try {
+        rawCount = (await cache.get(name)) as number | undefined;
+      } catch {
+        // Cache read failed for this tool — will compute fresh
+      }
+    }
+
+    if (rawCount == null || rawCount <= 0) {
+      rawCount = tokenCounter(new SystemMessage(json));
+      if (rawCount > 0 && cache) {
+        toWrite.push({ key: name, value: rawCount });
+      }
+    }
+
+    rawTotal += rawCount;
+  }
+
+  // Fire-and-forget cache writes for newly computed tools
+  if (cache && toWrite.length > 0) {
+    for (const { key, value } of toWrite) {
+      cache.set(key, value).catch((err: unknown) => {
+        logger.debug(`[toolTokens] Cache write failed for ${key}`, err);
+      });
     }
   }
 
-  return tokens;
+  const multiplier = getToolTokenMultiplier(provider, clientOptions);
+  return Math.ceil(rawTotal * multiplier);
 }

From 8db4f21f97c3518e4cd766ab0b55b47c0b47a219 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Fri, 3 Apr 2026 12:53:13 -0400
Subject: [PATCH 10/11] fix: scope tool token cache keys by tenantId

Tool definitions can differ per tenant in multi-tenant deployments.
Prefix per-tool cache keys with tenantId when present on the user
object, so tenants don't share stale token counts across different
tool configurations.

Cache key format: "{tenantId}:{toolName}" or "{toolName}" when no
tenant context exists.
---
 packages/api/src/agents/run.ts        | 1 +
 packages/api/src/agents/toolTokens.ts | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts
index 805f1e5df8..1b67f2b337 100644
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@@ -390,6 +390,7 @@ export async function createRun({
         provider,
         clientOptions: llmConfig,
         tokenCounter,
+        tenantId: user?.tenantId,
       });
     }
 
diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
index 829c03091e..22d88d0285 100644
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@@ -126,18 +126,22 @@ export async function getOrComputeToolTokens({
   provider,
   clientOptions,
   tokenCounter,
+  tenantId,
 }: {
   tools?: GenericTool[];
   toolDefinitions?: LCTool[];
   provider: Providers;
   clientOptions?: ClientOptions;
   tokenCounter: TokenCounter;
+  tenantId?: string;
 }): Promise<number> {
   const schemas = collectToolSchemas(tools, toolDefinitions);
   if (schemas.size === 0) {
     return 0;
   }
 
+  const keyPrefix = tenantId ? `${tenantId}:` : '';
+
   let cache: Keyv | undefined;
   try {
     cache = getCache();
@@ -149,11 +153,12 @@ export async function getOrComputeToolTokens({
   const toWrite: Array<{ key: string; value: number }> = [];
 
   for (const [name, json] of schemas) {
+    const cacheKey = `${keyPrefix}${name}`;
     let rawCount: number | undefined;
 
     if (cache) {
       try {
-        rawCount = (await cache.get(name)) as number | undefined;
+        rawCount = (await cache.get(cacheKey)) as number | undefined;
       } catch {
         // Cache read failed for this tool — will compute fresh
       }
@@ -162,7 +167,7 @@ export async function getOrComputeToolTokens({
     if (rawCount == null || rawCount <= 0) {
       rawCount = tokenCounter(new SystemMessage(json));
       if (rawCount > 0 && cache) {
-        toWrite.push({ key: name, value: rawCount });
+        toWrite.push({ key: cacheKey, value: rawCount });
       }
     }
 

From e2962f4967af7fa1baa2537a65a9146896a211e9 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Fri, 3 Apr 2026 14:08:03 -0400
Subject: [PATCH 11/11] fix: include toolType in per-tool cache key

Cache key is now {tenantId}:{name}:{toolType} (or {name}:{toolType}
without tenant). This differentiates builtin/mcp/action tools that
may share a name but have different schemas.

GenericTool entries derive type from the mcp flag; LCTool entries
use the toolType field (defaulting to builtin).

Also refactors collectToolSchemas to return ToolEntry[] with
pre-built cache keys instead of Map<name, json>.
---
 packages/api/src/agents/toolTokens.spec.ts | 151 +++++++++++++--------
 packages/api/src/agents/toolTokens.ts      |  59 ++++----
 2 files changed, 132 insertions(+), 78 deletions(-)

diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts
index 27461e0d67..8d324ea58b 100644
--- a/packages/api/src/agents/toolTokens.spec.ts
+++ b/packages/api/src/agents/toolTokens.spec.ts
@@ -37,11 +37,18 @@ function makeTool(name: string, description = `${name} description`): GenericToo
   }) as unknown as GenericTool;
 }
 
-function makeToolDef(name: string, description?: string): LCTool {
+function makeMcpTool(name: string): GenericTool {
+  const tool = makeTool(name) as unknown as Record<string, unknown>;
+  tool.mcp = true;
+  return tool as unknown as GenericTool;
+}
+
+function makeToolDef(name: string, opts?: Partial<LCTool>): LCTool {
   return {
     name,
-    description: description ?? `${name} description`,
-    parameters: { type: 'object', properties: { input: { type: 'string' } } },
+    description: opts?.description ?? `${name} description`,
+    parameters: opts?.parameters ?? { type: 'object', properties: { input: { type: 'string' } } },
+    ...opts,
   };
 }
 
@@ -65,34 +72,45 @@ beforeEach(() => {
 /* ========================================================================= */
 
 describe('collectToolSchemas', () => {
-  it('returns empty map when no tools provided', () => {
-    expect(collectToolSchemas().size).toBe(0);
-    expect(collectToolSchemas([], []).size).toBe(0);
+  it('returns empty array when no tools provided', () => {
+    expect(collectToolSchemas()).toHaveLength(0);
+    expect(collectToolSchemas([], [])).toHaveLength(0);
   });
 
-  it('collects schemas from GenericTool array keyed by name', () => {
-    const tools = [makeTool('alpha'), makeTool('beta')];
-    const schemas = collectToolSchemas(tools);
-    expect(schemas.size).toBe(2);
-    expect(schemas.has('alpha')).toBe(true);
-    expect(schemas.has('beta')).toBe(true);
+  it('collects entries from GenericTool array', () => {
+    const entries = collectToolSchemas([makeTool('alpha'), makeTool('beta')]);
+    expect(entries).toHaveLength(2);
+    expect(entries.map((e) => e.cacheKey)).toEqual(
+      expect.arrayContaining(['alpha:builtin', 'beta:builtin']),
+    );
   });
 
-  it('collects schemas from LCTool definitions', () => {
-    const defs = [makeToolDef('x'), makeToolDef('y')];
-    const schemas = collectToolSchemas(undefined, defs);
-    expect(schemas.size).toBe(2);
-    expect(schemas.has('x')).toBe(true);
-    expect(schemas.has('y')).toBe(true);
+  it('collects entries from LCTool definitions with toolType', () => {
+    const defs = [makeToolDef('x', { toolType: 'mcp' }), makeToolDef('y', { toolType: 'action' })];
+    const entries = collectToolSchemas(undefined, defs);
+    expect(entries).toHaveLength(2);
+    expect(entries[0].cacheKey).toBe('x:mcp');
+    expect(entries[1].cacheKey).toBe('y:action');
+  });
+
+  it('defaults toolType to builtin for LCTool without toolType', () => {
+    const entries = collectToolSchemas(undefined, [makeToolDef('z')]);
+    expect(entries[0].cacheKey).toBe('z:builtin');
+  });
+
+  it('uses mcp type for GenericTool with mcp flag', () => {
+    const entries = collectToolSchemas([makeMcpTool('search')]);
+    expect(entries[0].cacheKey).toBe('search:mcp');
   });
 
   it('deduplicates: GenericTool takes precedence over matching toolDefinition', () => {
     const tools = [makeTool('shared')];
     const defs = [makeToolDef('shared'), makeToolDef('only_def')];
-    const schemas = collectToolSchemas(tools, defs);
-    expect(schemas.size).toBe(2);
-    expect(schemas.has('shared')).toBe(true);
-    expect(schemas.has('only_def')).toBe(true);
+    const entries = collectToolSchemas(tools, defs);
+    expect(entries).toHaveLength(2);
+    const keys = entries.map((e) => e.cacheKey);
+    expect(keys).toContain('shared:builtin');
+    expect(keys).toContain('only_def:builtin');
   });
 });
 
@@ -109,9 +127,8 @@ describe('computeToolSchemaTokens', () => {
   });
 
   it('counts tokens from GenericTool schemas', () => {
-    const tools = [makeTool('test_tool')];
     const result = computeToolSchemaTokens(
-      tools,
+      [makeTool('test_tool')],
       undefined,
       Providers.OPENAI,
       undefined,
@@ -121,10 +138,9 @@ describe('computeToolSchemaTokens', () => {
   });
 
   it('counts tokens from LCTool definitions', () => {
-    const defs = [makeToolDef('test_def')];
     const result = computeToolSchemaTokens(
       undefined,
-      defs,
+      [makeToolDef('test_def')],
       Providers.OPENAI,
       undefined,
       fakeTokenCounter,
@@ -150,7 +166,6 @@ describe('computeToolSchemaTokens', () => {
       undefined,
       fakeTokenCounter,
     );
-
     expect(both).toBe(toolsOnly);
   });
 
@@ -170,19 +185,17 @@ describe('computeToolSchemaTokens', () => {
       undefined,
       fakeTokenCounter,
     );
-
     const expectedRatio = ANTHROPIC_TOOL_TOKEN_MULTIPLIER / DEFAULT_TOOL_TOKEN_MULTIPLIER;
     expect(anthropic / openai).toBeCloseTo(expectedRatio, 1);
   });
 
   it('applies Anthropic multiplier when model name contains "claude"', () => {
     const defs = [makeToolDef('tool')];
-    const clientOptions = { model: 'claude-3-opus' };
     const result = computeToolSchemaTokens(
       undefined,
       defs,
       Providers.OPENAI,
-      clientOptions,
+      { model: 'claude-3-opus' },
       fakeTokenCounter,
     );
     const defaultResult = computeToolSchemaTokens(
@@ -197,12 +210,11 @@ describe('computeToolSchemaTokens', () => {
 
   it('does not apply Anthropic multiplier for Bedrock even with claude model', () => {
     const defs = [makeToolDef('tool')];
-    const clientOptions = { model: 'claude-3-opus' };
     const bedrock = computeToolSchemaTokens(
       undefined,
       defs,
       Providers.BEDROCK,
-      clientOptions,
+      { model: 'claude-3-opus' },
       fakeTokenCounter,
     );
     const defaultResult = computeToolSchemaTokens(
@@ -212,7 +224,6 @@ describe('computeToolSchemaTokens', () => {
       undefined,
       fakeTokenCounter,
     );
-
     expect(bedrock).toBe(defaultResult);
   });
 });
@@ -239,8 +250,8 @@ describe('getOrComputeToolTokens', () => {
     });
 
     expect(result).toBeGreaterThan(0);
-    expect(mockCacheStore.has('tool_a')).toBe(true);
-    expect(mockCacheStore.has('tool_b')).toBe(true);
+    expect(mockCacheStore.has('tool_a:builtin')).toBe(true);
+    expect(mockCacheStore.has('tool_b:builtin')).toBe(true);
     expect(mockCacheStore.size).toBe(2);
   });
 
@@ -282,7 +293,6 @@ describe('getOrComputeToolTokens', () => {
     });
 
     expect(openai).not.toBe(anthropic);
-    // Only one cache entry — raw count is provider-agnostic
     expect(mockCacheStore.size).toBe(1);
   });
 
@@ -302,17 +312,63 @@ describe('getOrComputeToolTokens', () => {
       tokenCounter: counter,
     });
 
-    // Only one new tokenCounter call for tool_b
     expect(counter.mock.calls.length).toBe(callsAfterFirst + 1);
     expect(mockCacheStore.size).toBe(2);
   });
 
+  it('scopes cache keys by tenantId when provided', async () => {
+    const defs = [makeToolDef('tool')];
+
+    await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+      tenantId: 'tenant_123',
+    });
+
+    expect(mockCacheStore.has('tenant_123:tool:builtin')).toBe(true);
+  });
+
+  it('separates cache entries for different tenants', async () => {
+    const defs = [makeToolDef('tool')];
+
+    const t1 = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+      tenantId: 'tenant_1',
+    });
+
+    const t2 = await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+      tenantId: 'tenant_2',
+    });
+
+    expect(t1).toBe(t2);
+    expect(mockCacheStore.has('tenant_1:tool:builtin')).toBe(true);
+    expect(mockCacheStore.has('tenant_2:tool:builtin')).toBe(true);
+    expect(mockCacheStore.size).toBe(2);
+  });
+
+  it('caches mcp tools with mcp type in key', async () => {
+    const defs = [makeToolDef('search', { toolType: 'mcp' })];
+
+    await getOrComputeToolTokens({
+      toolDefinitions: defs,
+      provider: Providers.OPENAI,
+      tokenCounter: fakeTokenCounter,
+    });
+
+    expect(mockCacheStore.has('search:mcp')).toBe(true);
+  });
+
   it('falls back to compute when cache read throws', async () => {
     mockGet.mockRejectedValueOnce(new Error('Redis down'));
 
-    const defs = [makeToolDef('tool')];
     const result = await getOrComputeToolTokens({
-      toolDefinitions: defs,
+      toolDefinitions: [makeToolDef('tool')],
       provider: Providers.OPENAI,
       tokenCounter: fakeTokenCounter,
     });
@@ -324,9 +380,8 @@ describe('getOrComputeToolTokens', () => {
   it('does not throw when cache write fails', async () => {
     mockSet.mockRejectedValueOnce(new Error('Redis write error'));
 
-    const defs = [makeToolDef('tool_write_fail')];
     const result = await getOrComputeToolTokens({
-      toolDefinitions: defs,
+      toolDefinitions: [makeToolDef('tool_write_fail')],
       provider: Providers.OPENAI,
       tokenCounter: fakeTokenCounter,
     });
@@ -335,20 +390,6 @@ describe('getOrComputeToolTokens', () => {
     expect(mockSet).toHaveBeenCalled();
   });
 
-  it('uses GenericTool tools for per-tool caching', async () => {
-    const tools = [makeTool('alpha'), makeTool('beta')];
-
-    const result = await getOrComputeToolTokens({
-      tools,
-      provider: Providers.OPENAI,
-      tokenCounter: fakeTokenCounter,
-    });
-
-    expect(result).toBeGreaterThan(0);
-    expect(mockCacheStore.has('alpha')).toBe(true);
-    expect(mockCacheStore.has('beta')).toBe(true);
-  });
-
   it('matches computeToolSchemaTokens output for same inputs', async () => {
     const defs = [makeToolDef('a'), makeToolDef('b'), makeToolDef('c')];
 
diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts
index 22d88d0285..4a95b07600 100644
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@@ -13,6 +13,11 @@ import type { Keyv } from 'keyv';
 
 import { standardCache } from '~/cache';
 
+interface ToolEntry {
+  cacheKey: string;
+  json: string;
+}
+
 /** Module-level cache instance, lazily initialized. */
 let toolTokenCache: Keyv | undefined;
 
@@ -61,35 +66,43 @@ function serializeToolDef(def: LCTool): string {
 }
 
 /**
- * Builds a map of tool name → serialized schema JSON. Deduplicates: a tool
- * present in `tools` (with a schema) takes precedence over a matching
- * `toolDefinitions` entry, mirroring AgentContext.calculateInstructionTokens().
+ * Builds a list of tool entries with cache keys and serialized schemas.
+ * Deduplicates: a tool present in `tools` (with a schema) takes precedence
+ * over a matching `toolDefinitions` entry.
+ *
+ * Cache key includes toolType when available (from LCTool) to differentiate
+ * builtin/mcp/action tools that may share a name.
+ * GenericTool entries use the `mcp` flag when present.
  */
-export function collectToolSchemas(
-  tools?: GenericTool[],
-  toolDefinitions?: LCTool[],
-): Map<string, string> {
-  const schemas = new Map<string, string>();
+export function collectToolSchemas(tools?: GenericTool[], toolDefinitions?: LCTool[]): ToolEntry[] {
+  const seen = new Set<string>();
+  const entries: ToolEntry[] = [];
 
   if (tools) {
     for (const tool of tools) {
       const result = serializeGenericTool(tool);
-      if (result && result.name) {
-        schemas.set(result.name, result.json);
+      if (!result || !result.name) {
+        continue;
       }
+      seen.add(result.name);
+      const toolType =
+        (tool as unknown as Record<string, unknown>).mcp === true ? 'mcp' : 'builtin';
+      entries.push({ cacheKey: `${result.name}:${toolType}`, json: result.json });
     }
   }
 
   if (toolDefinitions) {
     for (const def of toolDefinitions) {
-      if (!def.name || schemas.has(def.name)) {
+      if (!def.name || seen.has(def.name)) {
         continue;
       }
-      schemas.set(def.name, serializeToolDef(def));
+      seen.add(def.name);
+      const toolType = def.toolType ?? 'builtin';
+      entries.push({ cacheKey: `${def.name}:${toolType}`, json: serializeToolDef(def) });
     }
   }
 
-  return schemas;
+  return entries;
 }
 
 /**
@@ -103,9 +116,9 @@ export function computeToolSchemaTokens(
   clientOptions: ClientOptions | undefined,
   tokenCounter: TokenCounter,
 ): number {
-  const schemas = collectToolSchemas(tools, toolDefinitions);
+  const entries = collectToolSchemas(tools, toolDefinitions);
   let rawTokens = 0;
-  for (const json of schemas.values()) {
+  for (const { json } of entries) {
     rawTokens += tokenCounter(new SystemMessage(json));
   }
   const multiplier = getToolTokenMultiplier(provider, clientOptions);
@@ -115,8 +128,8 @@ export function computeToolSchemaTokens(
 /**
  * Returns tool schema tokens, using per-tool caching to avoid redundant
  * token counting. Each tool's raw (pre-multiplier) token count is cached
- * individually by name, so adding/removing a tool only requires computing
- * the new one. The provider-specific multiplier is applied to the sum.
+ * individually, keyed by `{tenantId}:{name}:{toolType}` (or `{name}:{toolType}`
+ * without tenant). The provider-specific multiplier is applied to the sum.
  *
  * Returns 0 if there are no tools.
  */
@@ -135,8 +148,8 @@ export async function getOrComputeToolTokens({
   tokenCounter: TokenCounter;
   tenantId?: string;
 }): Promise<number> {
-  const schemas = collectToolSchemas(tools, toolDefinitions);
-  if (schemas.size === 0) {
+  const entries = collectToolSchemas(tools, toolDefinitions);
+  if (entries.length === 0) {
     return 0;
   }
 
@@ -152,13 +165,13 @@ export async function getOrComputeToolTokens({
   let rawTotal = 0;
   const toWrite: Array<{ key: string; value: number }> = [];
 
-  for (const [name, json] of schemas) {
-    const cacheKey = `${keyPrefix}${name}`;
+  for (const { cacheKey, json } of entries) {
+    const fullKey = `${keyPrefix}${cacheKey}`;
     let rawCount: number | undefined;
 
     if (cache) {
       try {
-        rawCount = (await cache.get(cacheKey)) as number | undefined;
+        rawCount = (await cache.get(fullKey)) as number | undefined;
       } catch {
         // Cache read failed for this tool — will compute fresh
       }
@@ -167,7 +180,7 @@ export async function getOrComputeToolTokens({
     if (rawCount == null || rawCount <= 0) {
       rawCount = tokenCounter(new SystemMessage(json));
       if (rawCount > 0 && cache) {
-        toWrite.push({ key: cacheKey, value: rawCount });
+        toWrite.push({ key: fullKey, value: rawCount });
       }
     }