feat: cache tool schema token counts to avoid redundant recalculation

Add time-based caching (30min TTL) for tool schema token counts using the existing Keyv/Redis infrastructure. Cache is keyed by provider and a lightweight fingerprint (sorted tool names + count), so agents sharing the same tool set share the cached value. New utility module (toolTokens.ts) provides reusable functions: - getToolFingerprint: stable fingerprint from tool names - computeToolSchemaTokens: mirrors AgentContext.calculateInstructionTokens - getOrComputeToolTokens: cache lookup with compute-on-miss In createRun, buildAgentContext is now async with Promise.all for parallel cache lookups in multi-agent runs. Pre-computed tokens are passed via AgentInputs.toolSchemaTokens, skipping calculateInstructionTokens in @librechat/agents entirely on cache hit.
2026-04-07 08:25:23 +02:00 · 2026-03-24 13:36:10 -04:00 · 2026-03-24 13:36:10 -04:00 · d18d34d9cf
commit d18d34d9cf
parent 611a1ef5dc
4 changed files with 193 additions and 5 deletions
--- a/api/cache/getLogStores.js
+++ b/api/cache/getLogStores.js
@ -56,6 +56,7 @@ const namespaces = {
    CacheKeys.ADMIN_OAUTH_EXCHANGE,
    Time.THIRTY_SECONDS,
  ),
+  [CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES),
 };

 /**
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@ -18,6 +18,7 @@ import type { BaseMessage } from '@langchain/core/messages';
 import type { IUser } from '@librechat/data-schemas';
 import type * as t from '~/types';
 import { resolveHeaders, createSafeUser } from '~/utils/env';
+import { getOrComputeToolTokens } from './toolTokens';

 /** Expected shape of JSON tool search results */
 interface ToolSearchJsonResult {
@ -296,7 +297,7 @@ export async function createRun({
      : new Set<string>();

  const agentInputs: AgentInputs[] = [];
-  const buildAgentContext = (agent: RunAgent) => {
+  const buildAgentContext = async (agent: RunAgent): Promise<AgentInputs> => {
    const provider =
      (providerEndpointMap[
        agent.provider as keyof typeof providerEndpointMap
@ -381,11 +382,24 @@ export async function createRun({
      agent.maxContextTokens,
    );

+    /** Resolve cached or computed tool schema tokens */
+    let toolSchemaTokens: number | undefined;
+    if (tokenCounter) {
+      toolSchemaTokens = await getOrComputeToolTokens({
+        tools: agent.tools,
+        toolDefinitions,
+        provider,
+        clientOptions: llmConfig,
+        tokenCounter,
+      });
+    }
+
    const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint);
    const agentInput: AgentInputs = {
      provider,
      reasoningKey,
      toolDefinitions,
+      toolSchemaTokens,
      agentId: agent.id,
      tools: agent.tools,
      clientOptions: llmConfig,
@ -401,12 +415,11 @@ export async function createRun({
      contextPruningConfig: summarization.contextPruning,
      maxToolResultChars: agent.maxToolResultChars,
    };
-    agentInputs.push(agentInput);
+    return agentInput;
  };

-  for (const agent of agents) {
-    buildAgentContext(agent);
-  }
+  const resolvedInputs = await Promise.all(agents.map(buildAgentContext));
+  agentInputs.push(...resolvedInputs);

  const graphConfig: RunConfig['graphConfig'] = {
    signal,
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@ -0,0 +1,169 @@
+import { SystemMessage } from '@langchain/core/messages';
+import {
+  Providers,
+  toJsonSchema,
+  ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
+  DEFAULT_TOOL_TOKEN_MULTIPLIER,
+} from '@librechat/agents';
+import { CacheKeys, Time } from 'librechat-data-provider';
+import { standardCache } from '~/cache';
+import type { Keyv } from 'keyv';
+import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
+
+/** Module-level cache instance, lazily initialized. */
+let toolTokenCache: Keyv | undefined;
+
+function getCache(): Keyv {
+  if (!toolTokenCache) {
+    toolTokenCache = standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES);
+  }
+  return toolTokenCache;
+}
+
+/**
+ * Builds a lightweight fingerprint from tool names.
+ * Sorted and deduplicated to ensure stability regardless of tool ordering.
+ */
+export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
+  const names = new Set<string>();
+
+  if (tools) {
+    for (const tool of tools) {
+      const name = (tool as unknown as Record<string, unknown>).name;
+      if (typeof name === 'string' && name) {
+        names.add(name);
+      }
+    }
+  }
+
+  if (toolDefinitions) {
+    for (const def of toolDefinitions) {
+      if (def.name) {
+        names.add(def.name);
+      }
+    }
+  }
+
+  if (names.size === 0) {
+    return '';
+  }
+
+  const sorted = Array.from(names).sort();
+  return sorted.join(',') + '|' + sorted.length;
+}
+
+/**
+ * Determines the provider-specific token multiplier for tool schemas.
+ */
+function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
+  const isAnthropic =
+    provider !== Providers.BEDROCK &&
+    (provider === Providers.ANTHROPIC ||
+      /anthropic|claude/i.test(
+        String((clientOptions as { model?: string } | undefined)?.model ?? ''),
+      ));
+  return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
+}
+
+/**
+ * Computes tool schema tokens from scratch using the provided token counter.
+ * Mirrors the logic in AgentContext.calculateInstructionTokens().
+ */
+export function computeToolSchemaTokens(
+  tools: GenericTool[] | undefined,
+  toolDefinitions: LCTool[] | undefined,
+  provider: Providers,
+  clientOptions: ClientOptions | undefined,
+  tokenCounter: TokenCounter,
+): number {
+  let toolTokens = 0;
+  const countedToolNames = new Set<string>();
+
+  if (tools && tools.length > 0) {
+    for (const tool of tools) {
+      const genericTool = tool as unknown as Record<string, unknown>;
+      if (genericTool.schema != null && typeof genericTool.schema === 'object') {
+        const toolName = (genericTool.name as string | undefined) ?? '';
+        const jsonSchema = toJsonSchema(
+          genericTool.schema,
+          toolName,
+          (genericTool.description as string | undefined) ?? '',
+        );
+        toolTokens += tokenCounter(new SystemMessage(JSON.stringify(jsonSchema)));
+        if (toolName) {
+          countedToolNames.add(toolName);
+        }
+      }
+    }
+  }
+
+  if (toolDefinitions && toolDefinitions.length > 0) {
+    for (const def of toolDefinitions) {
+      if (countedToolNames.has(def.name)) {
+        continue;
+      }
+      const schema = {
+        type: 'function',
+        function: {
+          name: def.name,
+          description: def.description ?? '',
+          parameters: def.parameters ?? {},
+        },
+      };
+      toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
+    }
+  }
+
+  const multiplier = getToolTokenMultiplier(provider, clientOptions);
+  return Math.ceil(toolTokens * multiplier);
+}
+
+/**
+ * Returns cached tool schema tokens if the fingerprint matches,
+ * otherwise computes them, caches the result (fire-and-forget), and returns.
+ *
+ * Returns 0 if there are no tools (no caching needed).
+ */
+export async function getOrComputeToolTokens({
+  tools,
+  toolDefinitions,
+  provider,
+  clientOptions,
+  tokenCounter,
+}: {
+  tools?: GenericTool[];
+  toolDefinitions?: LCTool[];
+  provider: Providers;
+  clientOptions?: ClientOptions;
+  tokenCounter: TokenCounter;
+}): Promise<number> {
+  const fingerprint = getToolFingerprint(tools, toolDefinitions);
+  if (!fingerprint) {
+    return 0;
+  }
+
+  const cacheKey = `${provider}:${fingerprint}`;
+  const cache = getCache();
+
+  const cached = (await cache.get(cacheKey)) as number | undefined;
+  if (cached != null && cached > 0) {
+    return cached;
+  }
+
+  const tokens = computeToolSchemaTokens(
+    tools,
+    toolDefinitions,
+    provider,
+    clientOptions,
+    tokenCounter,
+  );
+
+  if (tokens > 0) {
+    /** Fire-and-forget write — don't block the run on cache persistence */
+    cache.set(cacheKey, tokens).catch(() => {
+      /* swallow cache write errors */
+    });
+  }
+
+  return tokens;
+}
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@ -1529,6 +1529,11 @@ export enum CacheKeys {
   * Key for admin panel OAuth exchange codes (one-time-use, short TTL).
   */
  ADMIN_OAUTH_EXCHANGE = 'ADMIN_OAUTH_EXCHANGE',
+  /**
+   * Key for cached tool schema token counts.
+   * Keyed by provider + tool fingerprint to avoid redundant token counting.
+   */
+  TOOL_TOKENS = 'TOOL_TOKENS',
 }

 /**