fix: address review findings for tool token caching

- Wrap cache.get() in try/catch so Redis failures fall through to compute instead of killing the agent run (F1) - Include multiplier classification in cache key to prevent collisions between Anthropic and non-Anthropic models sharing a provider (F2) - Remove dead TOOL_TOKENS entry from getLogStores.js; toolTokens.ts manages its own Keyv instance (F4) - Add logger.debug for cache read/write failures (F7) - Export toolTokens from agents barrel (F8) - Fix import ordering (F9) - Remove intermediate agentInputs array (F10) - Remove narrating comments (F11)
2026-04-07 00:15:23 +02:00 · 2026-04-01 22:40:13 -04:00 · 2026-04-01 22:40:13 -04:00 · dc088d19fb
commit dc088d19fb
parent d18d34d9cf
4 changed files with 23 additions and 29 deletions
--- a/api/cache/getLogStores.js
+++ b/api/cache/getLogStores.js
@ -56,7 +56,6 @@ const namespaces = {
    CacheKeys.ADMIN_OAUTH_EXCHANGE,
    Time.THIRTY_SECONDS,
  ),
-  [CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES),
 };

 /**
--- a/packages/api/src/agents/index.ts
+++ b/packages/api/src/agents/index.ts
@ -19,3 +19,4 @@ export * from './tools';
 export * from './validation';
 export * from './added';
 export * from './load';
+export * from './toolTokens';
--- a/packages/api/src/agents/run.ts
+++ b/packages/api/src/agents/run.ts
@ -296,7 +296,6 @@ export async function createRun({
      ? extractDiscoveredToolsFromHistory(messages)
      : new Set<string>();

-  const agentInputs: AgentInputs[] = [];
  const buildAgentContext = async (agent: RunAgent): Promise<AgentInputs> => {
    const provider =
      (providerEndpointMap[
@ -382,7 +381,6 @@ export async function createRun({
      agent.maxContextTokens,
    );

-    /** Resolve cached or computed tool schema tokens */
    let toolSchemaTokens: number | undefined;
    if (tokenCounter) {
      toolSchemaTokens = await getOrComputeToolTokens({
@ -418,8 +416,7 @@ export async function createRun({
    return agentInput;
  };

-  const resolvedInputs = await Promise.all(agents.map(buildAgentContext));
-  agentInputs.push(...resolvedInputs);
+  const agentInputs = await Promise.all(agents.map(buildAgentContext));

  const graphConfig: RunConfig['graphConfig'] = {
    signal,
--- a/packages/api/src/agents/toolTokens.ts
+++ b/packages/api/src/agents/toolTokens.ts
@ -6,9 +6,12 @@ import {
  DEFAULT_TOOL_TOKEN_MULTIPLIER,
 } from '@librechat/agents';
 import { CacheKeys, Time } from 'librechat-data-provider';
-import { standardCache } from '~/cache';
-import type { Keyv } from 'keyv';
+
 import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
+import type { Keyv } from 'keyv';
+
+import { logger } from '@librechat/data-schemas';
+import { standardCache } from '~/cache';

 /** Module-level cache instance, lazily initialized. */
 let toolTokenCache: Keyv | undefined;
@ -20,10 +23,6 @@ function getCache(): Keyv {
  return toolTokenCache;
 }

-/**
- * Builds a lightweight fingerprint from tool names.
- * Sorted and deduplicated to ensure stability regardless of tool ordering.
- */
 export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
  const names = new Set<string>();

@ -52,9 +51,6 @@ export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTo
  return sorted.join(',') + '|' + sorted.length;
 }

-/**
- * Determines the provider-specific token multiplier for tool schemas.
- */
 function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
  const isAnthropic =
    provider !== Providers.BEDROCK &&
@ -65,10 +61,6 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio
  return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
 }

-/**
- * Computes tool schema tokens from scratch using the provided token counter.
- * Mirrors the logic in AgentContext.calculateInstructionTokens().
- */
 export function computeToolSchemaTokens(
  tools: GenericTool[] | undefined,
  toolDefinitions: LCTool[] | undefined,
@ -119,10 +111,10 @@ export function computeToolSchemaTokens(
 }

 /**
- * Returns cached tool schema tokens if the fingerprint matches,
- * otherwise computes them, caches the result (fire-and-forget), and returns.
- *
- * Returns 0 if there are no tools (no caching needed).
+ * Returns cached tool schema tokens or computes them on miss.
+ * Returns 0 if there are no tools.
+ * Cache errors are non-fatal — falls through to compute on read failure,
+ * logs on write failure.
 */
 export async function getOrComputeToolTokens({
  tools,
@ -142,12 +134,18 @@ export async function getOrComputeToolTokens({
    return 0;
  }

-  const cacheKey = `${provider}:${fingerprint}`;
+  const multiplier = getToolTokenMultiplier(provider, clientOptions);
+  const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default';
+  const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`;
  const cache = getCache();

-  const cached = (await cache.get(cacheKey)) as number | undefined;
-  if (cached != null && cached > 0) {
-    return cached;
+  try {
+    const cached = (await cache.get(cacheKey)) as number | undefined;
+    if (cached != null && cached > 0) {
+      return cached;
+    }
+  } catch (err) {
+    logger.debug('[toolTokens] Cache read failed, computing fresh', err);
  }

  const tokens = computeToolSchemaTokens(
@ -159,9 +157,8 @@ export async function getOrComputeToolTokens({
  );

  if (tokens > 0) {
-    /** Fire-and-forget write — don't block the run on cache persistence */
-    cache.set(cacheKey, tokens).catch(() => {
-      /* swallow cache write errors */
+    cache.set(cacheKey, tokens).catch((err: unknown) => {
+      logger.debug('[toolTokens] Cache write failed', err);
    });
  }