From d18d34d9cf7e5ff3087ae12bccf14cdefc1e4465 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Tue, 24 Mar 2026 13:36:10 -0400 Subject: [PATCH 01/11] feat: cache tool schema token counts to avoid redundant recalculation Add time-based caching (30min TTL) for tool schema token counts using the existing Keyv/Redis infrastructure. Cache is keyed by provider and a lightweight fingerprint (sorted tool names + count), so agents sharing the same tool set share the cached value. New utility module (toolTokens.ts) provides reusable functions: - getToolFingerprint: stable fingerprint from tool names - computeToolSchemaTokens: mirrors AgentContext.calculateInstructionTokens - getOrComputeToolTokens: cache lookup with compute-on-miss In createRun, buildAgentContext is now async with Promise.all for parallel cache lookups in multi-agent runs. Pre-computed tokens are passed via AgentInputs.toolSchemaTokens, skipping calculateInstructionTokens in @librechat/agents entirely on cache hit. --- api/cache/getLogStores.js | 1 + packages/api/src/agents/run.ts | 23 +++- packages/api/src/agents/toolTokens.ts | 169 ++++++++++++++++++++++++++ packages/data-provider/src/config.ts | 5 + 4 files changed, 193 insertions(+), 5 deletions(-) create mode 100644 packages/api/src/agents/toolTokens.ts diff --git a/api/cache/getLogStores.js b/api/cache/getLogStores.js index 70eb681e53..21dedde7ec 100644 --- a/api/cache/getLogStores.js +++ b/api/cache/getLogStores.js @@ -56,6 +56,7 @@ const namespaces = { CacheKeys.ADMIN_OAUTH_EXCHANGE, Time.THIRTY_SECONDS, ), + [CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES), }; /** diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index b6b5e6a14d..d0d8582819 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -18,6 +18,7 @@ import type { BaseMessage } from '@langchain/core/messages'; import type { IUser } from '@librechat/data-schemas'; import type * as t from '~/types'; import { resolveHeaders, createSafeUser } from '~/utils/env'; +import { getOrComputeToolTokens } from './toolTokens'; /** Expected shape of JSON tool search results */ interface ToolSearchJsonResult { @@ -296,7 +297,7 @@ export async function createRun({ : new Set(); const agentInputs: AgentInputs[] = []; - const buildAgentContext = (agent: RunAgent) => { + const buildAgentContext = async (agent: RunAgent): Promise => { const provider = (providerEndpointMap[ agent.provider as keyof typeof providerEndpointMap @@ -381,11 +382,24 @@ export async function createRun({ agent.maxContextTokens, ); + /** Resolve cached or computed tool schema tokens */ + let toolSchemaTokens: number | undefined; + if (tokenCounter) { + toolSchemaTokens = await getOrComputeToolTokens({ + tools: agent.tools, + toolDefinitions, + provider, + clientOptions: llmConfig, + tokenCounter, + }); + } + const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint); const agentInput: AgentInputs = { provider, reasoningKey, toolDefinitions, + toolSchemaTokens, agentId: agent.id, tools: agent.tools, clientOptions: llmConfig, @@ -401,12 +415,11 @@ export async function createRun({ contextPruningConfig: summarization.contextPruning, maxToolResultChars: agent.maxToolResultChars, }; - agentInputs.push(agentInput); + return agentInput; }; - for (const agent of agents) { - buildAgentContext(agent); - } + const resolvedInputs = await Promise.all(agents.map(buildAgentContext)); + agentInputs.push(...resolvedInputs); const graphConfig: RunConfig['graphConfig'] = { signal, diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts new file mode 100644 index 0000000000..98afd31500 --- /dev/null +++ b/packages/api/src/agents/toolTokens.ts @@ -0,0 +1,169 @@ +import { SystemMessage } from '@langchain/core/messages'; +import { + Providers, + toJsonSchema, + ANTHROPIC_TOOL_TOKEN_MULTIPLIER, + DEFAULT_TOOL_TOKEN_MULTIPLIER, +} from '@librechat/agents'; +import { CacheKeys, Time } from 'librechat-data-provider'; +import { standardCache } from '~/cache'; +import type { Keyv } from 'keyv'; +import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents'; + +/** Module-level cache instance, lazily initialized. */ +let toolTokenCache: Keyv | undefined; + +function getCache(): Keyv { + if (!toolTokenCache) { + toolTokenCache = standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES); + } + return toolTokenCache; +} + +/** + * Builds a lightweight fingerprint from tool names. + * Sorted and deduplicated to ensure stability regardless of tool ordering. + */ +export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string { + const names = new Set(); + + if (tools) { + for (const tool of tools) { + const name = (tool as unknown as Record).name; + if (typeof name === 'string' && name) { + names.add(name); + } + } + } + + if (toolDefinitions) { + for (const def of toolDefinitions) { + if (def.name) { + names.add(def.name); + } + } + } + + if (names.size === 0) { + return ''; + } + + const sorted = Array.from(names).sort(); + return sorted.join(',') + '|' + sorted.length; +} + +/** + * Determines the provider-specific token multiplier for tool schemas. + */ +function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number { + const isAnthropic = + provider !== Providers.BEDROCK && + (provider === Providers.ANTHROPIC || + /anthropic|claude/i.test( + String((clientOptions as { model?: string } | undefined)?.model ?? ''), + )); + return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER; +} + +/** + * Computes tool schema tokens from scratch using the provided token counter. + * Mirrors the logic in AgentContext.calculateInstructionTokens(). + */ +export function computeToolSchemaTokens( + tools: GenericTool[] | undefined, + toolDefinitions: LCTool[] | undefined, + provider: Providers, + clientOptions: ClientOptions | undefined, + tokenCounter: TokenCounter, +): number { + let toolTokens = 0; + const countedToolNames = new Set(); + + if (tools && tools.length > 0) { + for (const tool of tools) { + const genericTool = tool as unknown as Record; + if (genericTool.schema != null && typeof genericTool.schema === 'object') { + const toolName = (genericTool.name as string | undefined) ?? ''; + const jsonSchema = toJsonSchema( + genericTool.schema, + toolName, + (genericTool.description as string | undefined) ?? '', + ); + toolTokens += tokenCounter(new SystemMessage(JSON.stringify(jsonSchema))); + if (toolName) { + countedToolNames.add(toolName); + } + } + } + } + + if (toolDefinitions && toolDefinitions.length > 0) { + for (const def of toolDefinitions) { + if (countedToolNames.has(def.name)) { + continue; + } + const schema = { + type: 'function', + function: { + name: def.name, + description: def.description ?? '', + parameters: def.parameters ?? {}, + }, + }; + toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema))); + } + } + + const multiplier = getToolTokenMultiplier(provider, clientOptions); + return Math.ceil(toolTokens * multiplier); +} + +/** + * Returns cached tool schema tokens if the fingerprint matches, + * otherwise computes them, caches the result (fire-and-forget), and returns. + * + * Returns 0 if there are no tools (no caching needed). + */ +export async function getOrComputeToolTokens({ + tools, + toolDefinitions, + provider, + clientOptions, + tokenCounter, +}: { + tools?: GenericTool[]; + toolDefinitions?: LCTool[]; + provider: Providers; + clientOptions?: ClientOptions; + tokenCounter: TokenCounter; +}): Promise { + const fingerprint = getToolFingerprint(tools, toolDefinitions); + if (!fingerprint) { + return 0; + } + + const cacheKey = `${provider}:${fingerprint}`; + const cache = getCache(); + + const cached = (await cache.get(cacheKey)) as number | undefined; + if (cached != null && cached > 0) { + return cached; + } + + const tokens = computeToolSchemaTokens( + tools, + toolDefinitions, + provider, + clientOptions, + tokenCounter, + ); + + if (tokens > 0) { + /** Fire-and-forget write — don't block the run on cache persistence */ + cache.set(cacheKey, tokens).catch(() => { + /* swallow cache write errors */ + }); + } + + return tokens; +} diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index ae3f5b9560..f4429f6547 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1529,6 +1529,11 @@ export enum CacheKeys { * Key for admin panel OAuth exchange codes (one-time-use, short TTL). */ ADMIN_OAUTH_EXCHANGE = 'ADMIN_OAUTH_EXCHANGE', + /** + * Key for cached tool schema token counts. + * Keyed by provider + tool fingerprint to avoid redundant token counting. + */ + TOOL_TOKENS = 'TOOL_TOKENS', } /** From dc088d19fbb8dbf8f9679a0157b08366e1f9e3c6 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 22:40:13 -0400 Subject: [PATCH 02/11] fix: address review findings for tool token caching - Wrap cache.get() in try/catch so Redis failures fall through to compute instead of killing the agent run (F1) - Include multiplier classification in cache key to prevent collisions between Anthropic and non-Anthropic models sharing a provider (F2) - Remove dead TOOL_TOKENS entry from getLogStores.js; toolTokens.ts manages its own Keyv instance (F4) - Add logger.debug for cache read/write failures (F7) - Export toolTokens from agents barrel (F8) - Fix import ordering (F9) - Remove intermediate agentInputs array (F10) - Remove narrating comments (F11) --- api/cache/getLogStores.js | 1 - packages/api/src/agents/index.ts | 1 + packages/api/src/agents/run.ts | 5 +-- packages/api/src/agents/toolTokens.ts | 45 +++++++++++++-------------- 4 files changed, 23 insertions(+), 29 deletions(-) diff --git a/api/cache/getLogStores.js b/api/cache/getLogStores.js index 21dedde7ec..70eb681e53 100644 --- a/api/cache/getLogStores.js +++ b/api/cache/getLogStores.js @@ -56,7 +56,6 @@ const namespaces = { CacheKeys.ADMIN_OAUTH_EXCHANGE, Time.THIRTY_SECONDS, ), - [CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES), }; /** diff --git a/packages/api/src/agents/index.ts b/packages/api/src/agents/index.ts index 53f7f60a93..fbc46bfb3e 100644 --- a/packages/api/src/agents/index.ts +++ b/packages/api/src/agents/index.ts @@ -19,3 +19,4 @@ export * from './tools'; export * from './validation'; export * from './added'; export * from './load'; +export * from './toolTokens'; diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index d0d8582819..a728ed38bf 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -296,7 +296,6 @@ export async function createRun({ ? extractDiscoveredToolsFromHistory(messages) : new Set(); - const agentInputs: AgentInputs[] = []; const buildAgentContext = async (agent: RunAgent): Promise => { const provider = (providerEndpointMap[ @@ -382,7 +381,6 @@ export async function createRun({ agent.maxContextTokens, ); - /** Resolve cached or computed tool schema tokens */ let toolSchemaTokens: number | undefined; if (tokenCounter) { toolSchemaTokens = await getOrComputeToolTokens({ @@ -418,8 +416,7 @@ export async function createRun({ return agentInput; }; - const resolvedInputs = await Promise.all(agents.map(buildAgentContext)); - agentInputs.push(...resolvedInputs); + const agentInputs = await Promise.all(agents.map(buildAgentContext)); const graphConfig: RunConfig['graphConfig'] = { signal, diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts index 98afd31500..ee558740a7 100644 --- a/packages/api/src/agents/toolTokens.ts +++ b/packages/api/src/agents/toolTokens.ts @@ -6,9 +6,12 @@ import { DEFAULT_TOOL_TOKEN_MULTIPLIER, } from '@librechat/agents'; import { CacheKeys, Time } from 'librechat-data-provider'; -import { standardCache } from '~/cache'; -import type { Keyv } from 'keyv'; + import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents'; +import type { Keyv } from 'keyv'; + +import { logger } from '@librechat/data-schemas'; +import { standardCache } from '~/cache'; /** Module-level cache instance, lazily initialized. */ let toolTokenCache: Keyv | undefined; @@ -20,10 +23,6 @@ function getCache(): Keyv { return toolTokenCache; } -/** - * Builds a lightweight fingerprint from tool names. - * Sorted and deduplicated to ensure stability regardless of tool ordering. - */ export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string { const names = new Set(); @@ -52,9 +51,6 @@ export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTo return sorted.join(',') + '|' + sorted.length; } -/** - * Determines the provider-specific token multiplier for tool schemas. - */ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number { const isAnthropic = provider !== Providers.BEDROCK && @@ -65,10 +61,6 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER; } -/** - * Computes tool schema tokens from scratch using the provided token counter. - * Mirrors the logic in AgentContext.calculateInstructionTokens(). - */ export function computeToolSchemaTokens( tools: GenericTool[] | undefined, toolDefinitions: LCTool[] | undefined, @@ -119,10 +111,10 @@ export function computeToolSchemaTokens( } /** - * Returns cached tool schema tokens if the fingerprint matches, - * otherwise computes them, caches the result (fire-and-forget), and returns. - * - * Returns 0 if there are no tools (no caching needed). + * Returns cached tool schema tokens or computes them on miss. + * Returns 0 if there are no tools. + * Cache errors are non-fatal — falls through to compute on read failure, + * logs on write failure. */ export async function getOrComputeToolTokens({ tools, @@ -142,12 +134,18 @@ export async function getOrComputeToolTokens({ return 0; } - const cacheKey = `${provider}:${fingerprint}`; + const multiplier = getToolTokenMultiplier(provider, clientOptions); + const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default'; + const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`; const cache = getCache(); - const cached = (await cache.get(cacheKey)) as number | undefined; - if (cached != null && cached > 0) { - return cached; + try { + const cached = (await cache.get(cacheKey)) as number | undefined; + if (cached != null && cached > 0) { + return cached; + } + } catch (err) { + logger.debug('[toolTokens] Cache read failed, computing fresh', err); } const tokens = computeToolSchemaTokens( @@ -159,9 +157,8 @@ export async function getOrComputeToolTokens({ ); if (tokens > 0) { - /** Fire-and-forget write — don't block the run on cache persistence */ - cache.set(cacheKey, tokens).catch(() => { - /* swallow cache write errors */ + cache.set(cacheKey, tokens).catch((err: unknown) => { + logger.debug('[toolTokens] Cache write failed', err); }); } From 749666503c00913fc2e230a26856a622d3296fe4 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 22:45:29 -0400 Subject: [PATCH 03/11] fix: use Promise.allSettled for buildAgentContext to prevent single-agent failure from killing the entire run --- packages/api/src/agents/run.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index a728ed38bf..a4d0324b7a 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -15,6 +15,7 @@ import type { } from '@librechat/agents'; import type { Agent, SummarizationConfig } from 'librechat-data-provider'; import type { BaseMessage } from '@langchain/core/messages'; +import { logger } from '@librechat/data-schemas'; import type { IUser } from '@librechat/data-schemas'; import type * as t from '~/types'; import { resolveHeaders, createSafeUser } from '~/utils/env'; @@ -416,7 +417,16 @@ export async function createRun({ return agentInput; }; - const agentInputs = await Promise.all(agents.map(buildAgentContext)); + const settled = await Promise.allSettled(agents.map(buildAgentContext)); + const agentInputs: AgentInputs[] = []; + for (let i = 0; i < settled.length; i++) { + const result = settled[i]; + if (result.status === 'fulfilled') { + agentInputs.push(result.value); + } else { + logger.error(`[createRun] buildAgentContext failed for agent ${agents[i].id}`, result.reason); + } + } const graphConfig: RunConfig['graphConfig'] = { signal, From 90d32b691e7035a49e5c2ca0c7e4cb50904ea43b Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 22:49:05 -0400 Subject: [PATCH 04/11] refactor: single-pass collectToolData to avoid redundant tool iteration Extract collectToolData() that builds both the fingerprint names and serialized schemas in one pass over tools + toolDefinitions. getOrComputeToolTokens uses the pre-collected schemas directly on cache miss instead of re-looping. getToolFingerprint and computeToolSchemaTokens delegate to the same shared function for standalone use. --- packages/api/src/agents/toolTokens.ts | 159 ++++++++++++++------------ 1 file changed, 84 insertions(+), 75 deletions(-) diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts index ee558740a7..55938d42eb 100644 --- a/packages/api/src/agents/toolTokens.ts +++ b/packages/api/src/agents/toolTokens.ts @@ -23,34 +23,6 @@ function getCache(): Keyv { return toolTokenCache; } -export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string { - const names = new Set(); - - if (tools) { - for (const tool of tools) { - const name = (tool as unknown as Record).name; - if (typeof name === 'string' && name) { - names.add(name); - } - } - } - - if (toolDefinitions) { - for (const def of toolDefinitions) { - if (def.name) { - names.add(def.name); - } - } - } - - if (names.size === 0) { - return ''; - } - - const sorted = Array.from(names).sort(); - return sorted.join(',') + '|' + sorted.length; -} - function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number { const isAnthropic = provider !== Providers.BEDROCK && @@ -61,6 +33,77 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER; } +/** + * Single pass over tools and toolDefinitions. Collects deduplicated sorted + * tool names (for fingerprint) and pre-serialized schemas (for token + * counting on cache miss), mirroring the dedup logic in + * AgentContext.calculateInstructionTokens(). + */ +function collectToolData( + tools?: GenericTool[], + toolDefinitions?: LCTool[], +): { names: string[]; schemas: string[] } { + const nameSet = new Set(); + const countedNames = new Set(); + const schemas: string[] = []; + + if (tools) { + for (const tool of tools) { + const genericTool = tool as unknown as Record; + const toolName = (genericTool.name as string | undefined) ?? ''; + if (toolName) { + nameSet.add(toolName); + } + if (genericTool.schema != null && typeof genericTool.schema === 'object') { + schemas.push( + JSON.stringify( + toJsonSchema( + genericTool.schema, + toolName, + (genericTool.description as string | undefined) ?? '', + ), + ), + ); + if (toolName) { + countedNames.add(toolName); + } + } + } + } + + if (toolDefinitions) { + for (const def of toolDefinitions) { + if (def.name) { + nameSet.add(def.name); + } + if (countedNames.has(def.name)) { + continue; + } + schemas.push( + JSON.stringify({ + type: 'function', + function: { + name: def.name, + description: def.description ?? '', + parameters: def.parameters ?? {}, + }, + }), + ); + } + } + + const names = nameSet.size > 0 ? Array.from(nameSet).sort() : []; + return { names, schemas }; +} + +export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string { + const { names } = collectToolData(tools, toolDefinitions); + if (names.length === 0) { + return ''; + } + return names.join(',') + '|' + names.length; +} + export function computeToolSchemaTokens( tools: GenericTool[] | undefined, toolDefinitions: LCTool[] | undefined, @@ -68,44 +111,11 @@ export function computeToolSchemaTokens( clientOptions: ClientOptions | undefined, tokenCounter: TokenCounter, ): number { + const { schemas } = collectToolData(tools, toolDefinitions); let toolTokens = 0; - const countedToolNames = new Set(); - - if (tools && tools.length > 0) { - for (const tool of tools) { - const genericTool = tool as unknown as Record; - if (genericTool.schema != null && typeof genericTool.schema === 'object') { - const toolName = (genericTool.name as string | undefined) ?? ''; - const jsonSchema = toJsonSchema( - genericTool.schema, - toolName, - (genericTool.description as string | undefined) ?? '', - ); - toolTokens += tokenCounter(new SystemMessage(JSON.stringify(jsonSchema))); - if (toolName) { - countedToolNames.add(toolName); - } - } - } + for (const schema of schemas) { + toolTokens += tokenCounter(new SystemMessage(schema)); } - - if (toolDefinitions && toolDefinitions.length > 0) { - for (const def of toolDefinitions) { - if (countedToolNames.has(def.name)) { - continue; - } - const schema = { - type: 'function', - function: { - name: def.name, - description: def.description ?? '', - parameters: def.parameters ?? {}, - }, - }; - toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema))); - } - } - const multiplier = getToolTokenMultiplier(provider, clientOptions); return Math.ceil(toolTokens * multiplier); } @@ -113,8 +123,8 @@ export function computeToolSchemaTokens( /** * Returns cached tool schema tokens or computes them on miss. * Returns 0 if there are no tools. - * Cache errors are non-fatal — falls through to compute on read failure, - * logs on write failure. + * Single pass over tool arrays: builds fingerprint and serialized schemas + * together, then only runs the token counter if the cache misses. */ export async function getOrComputeToolTokens({ tools, @@ -129,11 +139,12 @@ export async function getOrComputeToolTokens({ clientOptions?: ClientOptions; tokenCounter: TokenCounter; }): Promise { - const fingerprint = getToolFingerprint(tools, toolDefinitions); - if (!fingerprint) { + const { names, schemas } = collectToolData(tools, toolDefinitions); + if (names.length === 0) { return 0; } + const fingerprint = names.join(',') + '|' + names.length; const multiplier = getToolTokenMultiplier(provider, clientOptions); const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default'; const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`; @@ -148,13 +159,11 @@ export async function getOrComputeToolTokens({ logger.debug('[toolTokens] Cache read failed, computing fresh', err); } - const tokens = computeToolSchemaTokens( - tools, - toolDefinitions, - provider, - clientOptions, - tokenCounter, - ); + let toolTokens = 0; + for (const schema of schemas) { + toolTokens += tokenCounter(new SystemMessage(schema)); + } + const tokens = Math.ceil(toolTokens * multiplier); if (tokens > 0) { cache.set(cacheKey, tokens).catch((err: unknown) => { From 80727a0c8697ca742f71e5fdb72ebf310e7741b2 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 22:52:49 -0400 Subject: [PATCH 05/11] test: add comprehensive tests for toolTokens utility module Covers all three exported functions: getToolFingerprint: - empty inputs, sorted output, dedup across tools/defs, ordering stability computeToolSchemaTokens: - empty inputs, GenericTool schemas, LCTool definitions, deduplication, Anthropic multiplier (provider, model name, Bedrock exclusion) getOrComputeToolTokens: - empty inputs, cache miss compute+write, cache hit (no recompute), per-provider separation, shared cache across agents, recompute on tool change, cache read failure fallback, cache write failure resilience, GenericTool fingerprint verification --- packages/api/src/agents/toolTokens.spec.ts | 377 +++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 packages/api/src/agents/toolTokens.spec.ts diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts new file mode 100644 index 0000000000..f5f0db870e --- /dev/null +++ b/packages/api/src/agents/toolTokens.spec.ts @@ -0,0 +1,377 @@ +import { z } from 'zod'; +import { SystemMessage } from '@langchain/core/messages'; +import { DynamicStructuredTool } from '@langchain/core/tools'; +import { + Providers, + ANTHROPIC_TOOL_TOKEN_MULTIPLIER, + DEFAULT_TOOL_TOKEN_MULTIPLIER, +} from '@librechat/agents'; +import type { GenericTool, LCTool, TokenCounter } from '@librechat/agents'; +import { getToolFingerprint, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens'; + +/* ---------- Mock standardCache to use a plain Map (no Redis) ---------- */ +const mockCacheStore = new Map(); +jest.mock('~/cache', () => ({ + standardCache: jest.fn(() => ({ + get: jest.fn((key: string) => Promise.resolve(mockCacheStore.get(key))), + set: jest.fn((key: string, value: unknown) => { + mockCacheStore.set(key, value); + return Promise.resolve(true); + }), + })), +})); + +jest.mock('@librechat/data-schemas', () => ({ + logger: { debug: jest.fn(), error: jest.fn(), warn: jest.fn(), info: jest.fn() }, +})); + +/* ---------- Helpers ---------- */ + +function makeTool(name: string, description = `${name} description`): GenericTool { + return new DynamicStructuredTool({ + name, + description, + schema: z.object({ input: z.string().optional() }), + func: async () => 'ok', + }) as unknown as GenericTool; +} + +function makeToolDef(name: string, description?: string): LCTool { + return { + name, + description: description ?? `${name} description`, + parameters: { type: 'object', properties: { input: { type: 'string' } } }, + }; +} + +/** Token counter that returns the string length of message content (deterministic). */ +const fakeTokenCounter: TokenCounter = (msg) => { + const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content); + return content.length; +}; + +beforeEach(() => { + mockCacheStore.clear(); +}); + +/* ========================================================================= */ +/* getToolFingerprint */ +/* ========================================================================= */ + +describe('getToolFingerprint', () => { + it('returns empty string when no tools or definitions provided', () => { + expect(getToolFingerprint()).toBe(''); + expect(getToolFingerprint([], [])).toBe(''); + }); + + it('returns sorted names with count from GenericTool array', () => { + const tools = [makeTool('beta'), makeTool('alpha')]; + expect(getToolFingerprint(tools)).toBe('alpha,beta|2'); + }); + + it('returns sorted names with count from LCTool definitions', () => { + const defs = [makeToolDef('zulu'), makeToolDef('alpha')]; + expect(getToolFingerprint(undefined, defs)).toBe('alpha,zulu|2'); + }); + + it('deduplicates names across tools and toolDefinitions', () => { + const tools = [makeTool('shared'), makeTool('only_tool')]; + const defs = [makeToolDef('shared'), makeToolDef('only_def')]; + expect(getToolFingerprint(tools, defs)).toBe('only_def,only_tool,shared|3'); + }); + + it('is stable regardless of input ordering', () => { + const a = getToolFingerprint([makeTool('x'), makeTool('a'), makeTool('m')]); + const b = getToolFingerprint([makeTool('m'), makeTool('x'), makeTool('a')]); + expect(a).toBe(b); + expect(a).toBe('a,m,x|3'); + }); +}); + +/* ========================================================================= */ +/* computeToolSchemaTokens */ +/* ========================================================================= */ + +describe('computeToolSchemaTokens', () => { + it('returns 0 when no tools provided', () => { + expect( + computeToolSchemaTokens(undefined, undefined, Providers.OPENAI, undefined, fakeTokenCounter), + ).toBe(0); + expect(computeToolSchemaTokens([], [], Providers.OPENAI, undefined, fakeTokenCounter)).toBe(0); + }); + + it('counts tokens from GenericTool schemas', () => { + const tools = [makeTool('test_tool')]; + const result = computeToolSchemaTokens( + tools, + undefined, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + expect(result).toBeGreaterThan(0); + }); + + it('counts tokens from LCTool definitions', () => { + const defs = [makeToolDef('test_def')]; + const result = computeToolSchemaTokens( + undefined, + defs, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + expect(result).toBeGreaterThan(0); + }); + + it('deduplicates: tool counted from tools array is skipped in toolDefinitions', () => { + const tools = [makeTool('shared')]; + const defs = [makeToolDef('shared')]; + + const toolsOnly = computeToolSchemaTokens( + tools, + undefined, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + const both = computeToolSchemaTokens( + tools, + defs, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + + expect(both).toBe(toolsOnly); + }); + + it('applies Anthropic multiplier for Anthropic provider', () => { + const defs = [makeToolDef('tool')]; + const openai = computeToolSchemaTokens( + undefined, + defs, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + const anthropic = computeToolSchemaTokens( + undefined, + defs, + Providers.ANTHROPIC, + undefined, + fakeTokenCounter, + ); + + const expectedRatio = ANTHROPIC_TOOL_TOKEN_MULTIPLIER / DEFAULT_TOOL_TOKEN_MULTIPLIER; + expect(anthropic / openai).toBeCloseTo(expectedRatio, 1); + }); + + it('applies Anthropic multiplier when model name contains "claude"', () => { + const defs = [makeToolDef('tool')]; + const clientOptions = { model: 'claude-3-opus' }; + const result = computeToolSchemaTokens( + undefined, + defs, + Providers.OPENAI, + clientOptions, + fakeTokenCounter, + ); + + const defaultResult = computeToolSchemaTokens( + undefined, + defs, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + expect(result).toBeGreaterThan(defaultResult); + }); + + it('does not apply Anthropic multiplier for Bedrock even with claude model', () => { + const defs = [makeToolDef('tool')]; + const clientOptions = { model: 'claude-3-opus' }; + const bedrock = computeToolSchemaTokens( + undefined, + defs, + Providers.BEDROCK, + clientOptions, + fakeTokenCounter, + ); + const defaultResult = computeToolSchemaTokens( + undefined, + defs, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + + expect(bedrock).toBe(defaultResult); + }); +}); + +/* ========================================================================= */ +/* getOrComputeToolTokens */ +/* ========================================================================= */ + +describe('getOrComputeToolTokens', () => { + it('returns 0 when no tools provided', async () => { + const result = await getOrComputeToolTokens({ + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + expect(result).toBe(0); + }); + + it('computes and caches tokens on first call', async () => { + const defs = [makeToolDef('tool_a'), makeToolDef('tool_b')]; + const result = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + expect(result).toBeGreaterThan(0); + expect(mockCacheStore.size).toBe(1); + + const cachedValue = Array.from(mockCacheStore.values())[0]; + expect(cachedValue).toBe(result); + }); + + it('returns cached value on second call without recomputing', async () => { + const defs = [makeToolDef('tool_a')]; + const counter = jest.fn(fakeTokenCounter); + + const first = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: counter, + }); + + const callCountAfterFirst = counter.mock.calls.length; + + const second = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: counter, + }); + + expect(second).toBe(first); + expect(counter.mock.calls.length).toBe(callCountAfterFirst); + }); + + it('caches separately for different providers with different multipliers', async () => { + const defs = [makeToolDef('tool')]; + + const openai = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + const anthropic = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.ANTHROPIC, + tokenCounter: fakeTokenCounter, + }); + + expect(openai).not.toBe(anthropic); + expect(mockCacheStore.size).toBe(2); + }); + + it('shares cache for same provider+tools across calls with different agents', async () => { + const defs = [makeToolDef('shared_tool')]; + + const first = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + const second = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + expect(first).toBe(second); + expect(mockCacheStore.size).toBe(1); + }); + + it('recomputes when tool set changes', async () => { + const first = await getOrComputeToolTokens({ + toolDefinitions: [makeToolDef('tool_a')], + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + const second = await getOrComputeToolTokens({ + toolDefinitions: [makeToolDef('tool_a'), makeToolDef('tool_b')], + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + expect(second).not.toBe(first); + expect(second).toBeGreaterThan(first); + expect(mockCacheStore.size).toBe(2); + }); + + it('falls back to compute when cache read throws', async () => { + const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock }; + const failingCache = { + get: jest.fn(() => Promise.reject(new Error('Redis down'))), + set: jest.fn(() => Promise.resolve(true)), + }; + standardCache.mockReturnValueOnce(failingCache); + + /** Reset the module-level cache so it picks up the failing mock */ + jest.resetModules(); + const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens'); + + const defs = [makeToolDef('tool')]; + const result = await freshGetOrCompute({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + expect(result).toBeGreaterThan(0); + }); + + it('does not throw when cache write fails', async () => { + const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock }; + const writeFailCache = { + get: jest.fn(() => Promise.resolve(undefined)), + set: jest.fn(() => Promise.reject(new Error('Redis write error'))), + }; + standardCache.mockReturnValueOnce(writeFailCache); + + jest.resetModules(); + const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens'); + + const defs = [makeToolDef('tool')]; + const result = await freshGetOrCompute({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + expect(result).toBeGreaterThan(0); + }); + + it('uses GenericTool tools for fingerprint and token counting', async () => { + const tools = [makeTool('alpha'), makeTool('beta')]; + + const result = await getOrComputeToolTokens({ + tools, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + expect(result).toBeGreaterThan(0); + expect(mockCacheStore.size).toBe(1); + + const key = Array.from(mockCacheStore.keys())[0]; + expect(key).toContain('alpha,beta|2'); + }); +}); From af2cbbcc54c76c7837b86737777c1347627508df Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 22:58:14 -0400 Subject: [PATCH 06/11] fix: guard against empty/partial agent initialization failures - Throw if all agents fail to initialize (empty agentInputs) - Throw if any agent fails in a routed multi-agent run (partial graph) - Move getCache() inside try/catch so cache init errors are non-fatal - Add inline comment explaining deliberate non-delegation of token loop - Fix import ordering in run.ts and toolTokens.ts --- packages/api/src/agents/run.ts | 19 ++++++++++++++++- packages/api/src/agents/toolTokens.ts | 30 +++++++++++++++++++-------- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index a4d0324b7a..ae9062d9a9 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -1,5 +1,6 @@ import { Run, Providers, Constants } from '@librechat/agents'; import { providerEndpointMap, KnownEndpoints } from 'librechat-data-provider'; +import { logger } from '@librechat/data-schemas'; import type { SummarizationConfig as AgentSummarizationConfig, MultiAgentGraphConfig, @@ -15,7 +16,6 @@ import type { } from '@librechat/agents'; import type { Agent, SummarizationConfig } from 'librechat-data-provider'; import type { BaseMessage } from '@langchain/core/messages'; -import { logger } from '@librechat/data-schemas'; import type { IUser } from '@librechat/data-schemas'; import type * as t from '~/types'; import { resolveHeaders, createSafeUser } from '~/utils/env'; @@ -428,6 +428,23 @@ export async function createRun({ } } + if (agentInputs.length === 0) { + throw new Error( + `[createRun] All ${agents.length} agent(s) failed to initialize; cannot create run`, + ); + } + + const hasEdges = (agents[0].edges?.length ?? 0) > 0; + if (agentInputs.length < agents.length && hasEdges) { + const failedIds = agents + .filter((_, i) => settled[i].status === 'rejected') + .map((a) => a.id) + .join(', '); + throw new Error( + `[createRun] Agent(s) [${failedIds}] failed in a routed multi-agent run; cannot proceed with partial graph`, + ); + } + const graphConfig: RunConfig['graphConfig'] = { signal, agents: agentInputs, diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts index 55938d42eb..6752b0cdb0 100644 --- a/packages/api/src/agents/toolTokens.ts +++ b/packages/api/src/agents/toolTokens.ts @@ -6,11 +6,11 @@ import { DEFAULT_TOOL_TOKEN_MULTIPLIER, } from '@librechat/agents'; import { CacheKeys, Time } from 'librechat-data-provider'; +import { logger } from '@librechat/data-schemas'; import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents'; import type { Keyv } from 'keyv'; -import { logger } from '@librechat/data-schemas'; import { standardCache } from '~/cache'; /** Module-level cache instance, lazily initialized. */ @@ -34,10 +34,14 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio } /** - * Single pass over tools and toolDefinitions. Collects deduplicated sorted - * tool names (for fingerprint) and pre-serialized schemas (for token - * counting on cache miss), mirroring the dedup logic in - * AgentContext.calculateInstructionTokens(). + * Single pass over tools and toolDefinitions. Collects: + * - `names`: deduplicated, sorted tool names for fingerprinting. + * - `schemas`: pre-serialized JSON strings for token counting. + * + * `nameSet` tracks all tool names (for the fingerprint). `countedNames` + * tracks which tools contributed a schema from the `tools` array — a + * toolDefinition whose name is in `countedNames` is skipped to avoid + * double-counting, mirroring AgentContext.calculateInstructionTokens(). */ function collectToolData( tools?: GenericTool[], @@ -148,9 +152,9 @@ export async function getOrComputeToolTokens({ const multiplier = getToolTokenMultiplier(provider, clientOptions); const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default'; const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`; - const cache = getCache(); try { + const cache = getCache(); const cached = (await cache.get(cacheKey)) as number | undefined; if (cached != null && cached > 0) { return cached; @@ -159,6 +163,8 @@ export async function getOrComputeToolTokens({ logger.debug('[toolTokens] Cache read failed, computing fresh', err); } + // Inline token count — not delegating to computeToolSchemaTokens to avoid + // a second collectToolData pass; schemas are already built above. let toolTokens = 0; for (const schema of schemas) { toolTokens += tokenCounter(new SystemMessage(schema)); @@ -166,9 +172,15 @@ export async function getOrComputeToolTokens({ const tokens = Math.ceil(toolTokens * multiplier); if (tokens > 0) { - cache.set(cacheKey, tokens).catch((err: unknown) => { - logger.debug('[toolTokens] Cache write failed', err); - }); + try { + getCache() + .set(cacheKey, tokens) + .catch((err: unknown) => { + logger.debug('[toolTokens] Cache write failed', err); + }); + } catch { + // getCache() init failure on write path — non-fatal + } } return tokens; From 54d1a36101b3dd7941a0c8144413e728dbb22de2 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 22:59:24 -0400 Subject: [PATCH 07/11] chore: import order --- packages/api/src/agents/run.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index ae9062d9a9..805f1e5df8 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -1,6 +1,6 @@ +import { logger } from '@librechat/data-schemas'; import { Run, Providers, Constants } from '@librechat/agents'; import { providerEndpointMap, KnownEndpoints } from 'librechat-data-provider'; -import { logger } from '@librechat/data-schemas'; import type { SummarizationConfig as AgentSummarizationConfig, MultiAgentGraphConfig, From 06eb21c6537b1bea53d9273fef04c969f00724fa Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 23:07:56 -0400 Subject: [PATCH 08/11] fix: repair broken cache-error tests and fix import ordering - Hoist mockGet/mockSet as module-level jest.fn() instances so mockRejectedValueOnce targets the actual mock consumed by the module. Previously jest.resetModules() created a new mock instance, so the cache-error tests were silently testing the normal miss path. - Reset mock implementations in beforeEach for test isolation. - Fix section 1 import order in toolTokens.ts (shortest to longest). - Add blank-line separators between import sections in spec file. --- packages/api/src/agents/toolTokens.spec.ts | 54 +++++++++------------- packages/api/src/agents/toolTokens.ts | 4 +- 2 files changed, 24 insertions(+), 34 deletions(-) diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts index f5f0db870e..cc678bd514 100644 --- a/packages/api/src/agents/toolTokens.spec.ts +++ b/packages/api/src/agents/toolTokens.spec.ts @@ -1,24 +1,25 @@ import { z } from 'zod'; -import { SystemMessage } from '@langchain/core/messages'; import { DynamicStructuredTool } from '@langchain/core/tools'; import { Providers, ANTHROPIC_TOOL_TOKEN_MULTIPLIER, DEFAULT_TOOL_TOKEN_MULTIPLIER, } from '@librechat/agents'; + import type { GenericTool, LCTool, TokenCounter } from '@librechat/agents'; + import { getToolFingerprint, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens'; -/* ---------- Mock standardCache to use a plain Map (no Redis) ---------- */ +/* ---------- Mock standardCache with hoisted get/set for per-test overrides ---------- */ const mockCacheStore = new Map(); +const mockGet = jest.fn((key: string) => Promise.resolve(mockCacheStore.get(key))); +const mockSet = jest.fn((key: string, value: unknown) => { + mockCacheStore.set(key, value); + return Promise.resolve(true); +}); + jest.mock('~/cache', () => ({ - standardCache: jest.fn(() => ({ - get: jest.fn((key: string) => Promise.resolve(mockCacheStore.get(key))), - set: jest.fn((key: string, value: unknown) => { - mockCacheStore.set(key, value); - return Promise.resolve(true); - }), - })), + standardCache: jest.fn(() => ({ get: mockGet, set: mockSet })), })); jest.mock('@librechat/data-schemas', () => ({ @@ -52,6 +53,11 @@ const fakeTokenCounter: TokenCounter = (msg) => { beforeEach(() => { mockCacheStore.clear(); + mockGet.mockImplementation((key: string) => Promise.resolve(mockCacheStore.get(key))); + mockSet.mockImplementation((key: string, value: unknown) => { + mockCacheStore.set(key, value); + return Promise.resolve(true); + }); }); /* ========================================================================= */ @@ -177,7 +183,6 @@ describe('computeToolSchemaTokens', () => { clientOptions, fakeTokenCounter, ); - const defaultResult = computeToolSchemaTokens( undefined, defs, @@ -317,46 +322,31 @@ describe('getOrComputeToolTokens', () => { }); it('falls back to compute when cache read throws', async () => { - const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock }; - const failingCache = { - get: jest.fn(() => Promise.reject(new Error('Redis down'))), - set: jest.fn(() => Promise.resolve(true)), - }; - standardCache.mockReturnValueOnce(failingCache); - - /** Reset the module-level cache so it picks up the failing mock */ - jest.resetModules(); - const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens'); + mockGet.mockRejectedValueOnce(new Error('Redis down')); const defs = [makeToolDef('tool')]; - const result = await freshGetOrCompute({ + const result = await getOrComputeToolTokens({ toolDefinitions: defs, provider: Providers.OPENAI, tokenCounter: fakeTokenCounter, }); expect(result).toBeGreaterThan(0); + expect(mockGet).toHaveBeenCalled(); }); it('does not throw when cache write fails', async () => { - const { standardCache } = jest.requireMock('~/cache') as { standardCache: jest.Mock }; - const writeFailCache = { - get: jest.fn(() => Promise.resolve(undefined)), - set: jest.fn(() => Promise.reject(new Error('Redis write error'))), - }; - standardCache.mockReturnValueOnce(writeFailCache); + mockSet.mockRejectedValueOnce(new Error('Redis write error')); - jest.resetModules(); - const { getOrComputeToolTokens: freshGetOrCompute } = await import('./toolTokens'); - - const defs = [makeToolDef('tool')]; - const result = await freshGetOrCompute({ + const defs = [makeToolDef('tool_write_fail')]; + const result = await getOrComputeToolTokens({ toolDefinitions: defs, provider: Providers.OPENAI, tokenCounter: fakeTokenCounter, }); expect(result).toBeGreaterThan(0); + expect(mockSet).toHaveBeenCalled(); }); it('uses GenericTool tools for fingerprint and token counting', async () => { diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts index 6752b0cdb0..e8a7104020 100644 --- a/packages/api/src/agents/toolTokens.ts +++ b/packages/api/src/agents/toolTokens.ts @@ -1,12 +1,12 @@ +import { logger } from '@librechat/data-schemas'; import { SystemMessage } from '@langchain/core/messages'; +import { CacheKeys, Time } from 'librechat-data-provider'; import { Providers, toJsonSchema, ANTHROPIC_TOOL_TOKEN_MULTIPLIER, DEFAULT_TOOL_TOKEN_MULTIPLIER, } from '@librechat/agents'; -import { CacheKeys, Time } from 'librechat-data-provider'; -import { logger } from '@librechat/data-schemas'; import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents'; import type { Keyv } from 'keyv'; From df99e33543dd3fce42d2a073ef9e1aaf8f35e37a Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 1 Apr 2026 23:15:40 -0400 Subject: [PATCH 09/11] refactor: per-tool token count caching instead of per-tool-set Cache each tool's raw (pre-multiplier) token count individually by name, rather than caching the total for an entire tool set by fingerprint. This means: - Adding/removing a tool only requires computing the new one - Agents sharing some but not all tools benefit from shared cache hits - The provider-specific multiplier is applied fresh to the sum each time (trivial cost), so raw counts are provider-agnostic and maximally shared Replace collectToolData/getToolFingerprint with collectToolSchemas which returns a Map. getOrComputeToolTokens iterates the map, checking cache per tool and only running tokenCounter on misses. --- packages/api/src/agents/toolTokens.spec.ts | 122 +++++++------- packages/api/src/agents/toolTokens.ts | 184 ++++++++++----------- 2 files changed, 153 insertions(+), 153 deletions(-) diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts index cc678bd514..27461e0d67 100644 --- a/packages/api/src/agents/toolTokens.spec.ts +++ b/packages/api/src/agents/toolTokens.spec.ts @@ -8,7 +8,7 @@ import { import type { GenericTool, LCTool, TokenCounter } from '@librechat/agents'; -import { getToolFingerprint, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens'; +import { collectToolSchemas, computeToolSchemaTokens, getOrComputeToolTokens } from './toolTokens'; /* ---------- Mock standardCache with hoisted get/set for per-test overrides ---------- */ const mockCacheStore = new Map(); @@ -61,36 +61,38 @@ beforeEach(() => { }); /* ========================================================================= */ -/* getToolFingerprint */ +/* collectToolSchemas */ /* ========================================================================= */ -describe('getToolFingerprint', () => { - it('returns empty string when no tools or definitions provided', () => { - expect(getToolFingerprint()).toBe(''); - expect(getToolFingerprint([], [])).toBe(''); +describe('collectToolSchemas', () => { + it('returns empty map when no tools provided', () => { + expect(collectToolSchemas().size).toBe(0); + expect(collectToolSchemas([], []).size).toBe(0); }); - it('returns sorted names with count from GenericTool array', () => { - const tools = [makeTool('beta'), makeTool('alpha')]; - expect(getToolFingerprint(tools)).toBe('alpha,beta|2'); + it('collects schemas from GenericTool array keyed by name', () => { + const tools = [makeTool('alpha'), makeTool('beta')]; + const schemas = collectToolSchemas(tools); + expect(schemas.size).toBe(2); + expect(schemas.has('alpha')).toBe(true); + expect(schemas.has('beta')).toBe(true); }); - it('returns sorted names with count from LCTool definitions', () => { - const defs = [makeToolDef('zulu'), makeToolDef('alpha')]; - expect(getToolFingerprint(undefined, defs)).toBe('alpha,zulu|2'); + it('collects schemas from LCTool definitions', () => { + const defs = [makeToolDef('x'), makeToolDef('y')]; + const schemas = collectToolSchemas(undefined, defs); + expect(schemas.size).toBe(2); + expect(schemas.has('x')).toBe(true); + expect(schemas.has('y')).toBe(true); }); - it('deduplicates names across tools and toolDefinitions', () => { - const tools = [makeTool('shared'), makeTool('only_tool')]; + it('deduplicates: GenericTool takes precedence over matching toolDefinition', () => { + const tools = [makeTool('shared')]; const defs = [makeToolDef('shared'), makeToolDef('only_def')]; - expect(getToolFingerprint(tools, defs)).toBe('only_def,only_tool,shared|3'); - }); - - it('is stable regardless of input ordering', () => { - const a = getToolFingerprint([makeTool('x'), makeTool('a'), makeTool('m')]); - const b = getToolFingerprint([makeTool('m'), makeTool('x'), makeTool('a')]); - expect(a).toBe(b); - expect(a).toBe('a,m,x|3'); + const schemas = collectToolSchemas(tools, defs); + expect(schemas.size).toBe(2); + expect(schemas.has('shared')).toBe(true); + expect(schemas.has('only_def')).toBe(true); }); }); @@ -228,7 +230,7 @@ describe('getOrComputeToolTokens', () => { expect(result).toBe(0); }); - it('computes and caches tokens on first call', async () => { + it('computes and caches each tool individually on first call', async () => { const defs = [makeToolDef('tool_a'), makeToolDef('tool_b')]; const result = await getOrComputeToolTokens({ toolDefinitions: defs, @@ -237,13 +239,12 @@ describe('getOrComputeToolTokens', () => { }); expect(result).toBeGreaterThan(0); - expect(mockCacheStore.size).toBe(1); - - const cachedValue = Array.from(mockCacheStore.values())[0]; - expect(cachedValue).toBe(result); + expect(mockCacheStore.has('tool_a')).toBe(true); + expect(mockCacheStore.has('tool_b')).toBe(true); + expect(mockCacheStore.size).toBe(2); }); - it('returns cached value on second call without recomputing', async () => { + it('uses cached per-tool values on second call without recomputing', async () => { const defs = [makeToolDef('tool_a')]; const counter = jest.fn(fakeTokenCounter); @@ -265,7 +266,7 @@ describe('getOrComputeToolTokens', () => { expect(counter.mock.calls.length).toBe(callCountAfterFirst); }); - it('caches separately for different providers with different multipliers', async () => { + it('applies different multipliers for different providers on same cached raw counts', async () => { const defs = [makeToolDef('tool')]; const openai = await getOrComputeToolTokens({ @@ -281,43 +282,28 @@ describe('getOrComputeToolTokens', () => { }); expect(openai).not.toBe(anthropic); - expect(mockCacheStore.size).toBe(2); - }); - - it('shares cache for same provider+tools across calls with different agents', async () => { - const defs = [makeToolDef('shared_tool')]; - - const first = await getOrComputeToolTokens({ - toolDefinitions: defs, - provider: Providers.OPENAI, - tokenCounter: fakeTokenCounter, - }); - - const second = await getOrComputeToolTokens({ - toolDefinitions: defs, - provider: Providers.OPENAI, - tokenCounter: fakeTokenCounter, - }); - - expect(first).toBe(second); + // Only one cache entry — raw count is provider-agnostic expect(mockCacheStore.size).toBe(1); }); - it('recomputes when tool set changes', async () => { - const first = await getOrComputeToolTokens({ + it('only computes new tools when tool set grows', async () => { + const counter = jest.fn(fakeTokenCounter); + + await getOrComputeToolTokens({ toolDefinitions: [makeToolDef('tool_a')], provider: Providers.OPENAI, - tokenCounter: fakeTokenCounter, + tokenCounter: counter, }); + const callsAfterFirst = counter.mock.calls.length; - const second = await getOrComputeToolTokens({ + await getOrComputeToolTokens({ toolDefinitions: [makeToolDef('tool_a'), makeToolDef('tool_b')], provider: Providers.OPENAI, - tokenCounter: fakeTokenCounter, + tokenCounter: counter, }); - expect(second).not.toBe(first); - expect(second).toBeGreaterThan(first); + // Only one new tokenCounter call for tool_b + expect(counter.mock.calls.length).toBe(callsAfterFirst + 1); expect(mockCacheStore.size).toBe(2); }); @@ -349,7 +335,7 @@ describe('getOrComputeToolTokens', () => { expect(mockSet).toHaveBeenCalled(); }); - it('uses GenericTool tools for fingerprint and token counting', async () => { + it('uses GenericTool tools for per-tool caching', async () => { const tools = [makeTool('alpha'), makeTool('beta')]; const result = await getOrComputeToolTokens({ @@ -359,9 +345,27 @@ describe('getOrComputeToolTokens', () => { }); expect(result).toBeGreaterThan(0); - expect(mockCacheStore.size).toBe(1); + expect(mockCacheStore.has('alpha')).toBe(true); + expect(mockCacheStore.has('beta')).toBe(true); + }); - const key = Array.from(mockCacheStore.keys())[0]; - expect(key).toContain('alpha,beta|2'); + it('matches computeToolSchemaTokens output for same inputs', async () => { + const defs = [makeToolDef('a'), makeToolDef('b'), makeToolDef('c')]; + + const cached = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + const direct = computeToolSchemaTokens( + undefined, + defs, + Providers.OPENAI, + undefined, + fakeTokenCounter, + ); + + expect(cached).toBe(direct); }); }); diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts index e8a7104020..829c03091e 100644 --- a/packages/api/src/agents/toolTokens.ts +++ b/packages/api/src/agents/toolTokens.ts @@ -23,7 +23,7 @@ function getCache(): Keyv { return toolTokenCache; } -function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number { +export function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number { const isAnthropic = provider !== Providers.BEDROCK && (provider === Providers.ANTHROPIC || @@ -33,81 +33,69 @@ function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptio return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER; } +/** Serializes a GenericTool to a JSON string for token counting. Returns null if no schema. */ +function serializeGenericTool(tool: GenericTool): { name: string; json: string } | null { + const genericTool = tool as unknown as Record; + const toolName = (genericTool.name as string | undefined) ?? ''; + if (genericTool.schema == null || typeof genericTool.schema !== 'object') { + return null; + } + const jsonSchema = toJsonSchema( + genericTool.schema, + toolName, + (genericTool.description as string | undefined) ?? '', + ); + return { name: toolName, json: JSON.stringify(jsonSchema) }; +} + +/** Serializes an LCTool definition to a JSON string for token counting. */ +function serializeToolDef(def: LCTool): string { + return JSON.stringify({ + type: 'function', + function: { + name: def.name, + description: def.description ?? '', + parameters: def.parameters ?? {}, + }, + }); +} + /** - * Single pass over tools and toolDefinitions. Collects: - * - `names`: deduplicated, sorted tool names for fingerprinting. - * - `schemas`: pre-serialized JSON strings for token counting. - * - * `nameSet` tracks all tool names (for the fingerprint). `countedNames` - * tracks which tools contributed a schema from the `tools` array — a - * toolDefinition whose name is in `countedNames` is skipped to avoid - * double-counting, mirroring AgentContext.calculateInstructionTokens(). + * Builds a map of tool name → serialized schema JSON. Deduplicates: a tool + * present in `tools` (with a schema) takes precedence over a matching + * `toolDefinitions` entry, mirroring AgentContext.calculateInstructionTokens(). */ -function collectToolData( +export function collectToolSchemas( tools?: GenericTool[], toolDefinitions?: LCTool[], -): { names: string[]; schemas: string[] } { - const nameSet = new Set(); - const countedNames = new Set(); - const schemas: string[] = []; +): Map { + const schemas = new Map(); if (tools) { for (const tool of tools) { - const genericTool = tool as unknown as Record; - const toolName = (genericTool.name as string | undefined) ?? ''; - if (toolName) { - nameSet.add(toolName); - } - if (genericTool.schema != null && typeof genericTool.schema === 'object') { - schemas.push( - JSON.stringify( - toJsonSchema( - genericTool.schema, - toolName, - (genericTool.description as string | undefined) ?? '', - ), - ), - ); - if (toolName) { - countedNames.add(toolName); - } + const result = serializeGenericTool(tool); + if (result && result.name) { + schemas.set(result.name, result.json); } } } if (toolDefinitions) { for (const def of toolDefinitions) { - if (def.name) { - nameSet.add(def.name); - } - if (countedNames.has(def.name)) { + if (!def.name || schemas.has(def.name)) { continue; } - schemas.push( - JSON.stringify({ - type: 'function', - function: { - name: def.name, - description: def.description ?? '', - parameters: def.parameters ?? {}, - }, - }), - ); + schemas.set(def.name, serializeToolDef(def)); } } - const names = nameSet.size > 0 ? Array.from(nameSet).sort() : []; - return { names, schemas }; -} - -export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string { - const { names } = collectToolData(tools, toolDefinitions); - if (names.length === 0) { - return ''; - } - return names.join(',') + '|' + names.length; + return schemas; } +/** + * Computes tool schema tokens from scratch using the provided token counter. + * Mirrors the logic in AgentContext.calculateInstructionTokens(). + */ export function computeToolSchemaTokens( tools: GenericTool[] | undefined, toolDefinitions: LCTool[] | undefined, @@ -115,20 +103,22 @@ export function computeToolSchemaTokens( clientOptions: ClientOptions | undefined, tokenCounter: TokenCounter, ): number { - const { schemas } = collectToolData(tools, toolDefinitions); - let toolTokens = 0; - for (const schema of schemas) { - toolTokens += tokenCounter(new SystemMessage(schema)); + const schemas = collectToolSchemas(tools, toolDefinitions); + let rawTokens = 0; + for (const json of schemas.values()) { + rawTokens += tokenCounter(new SystemMessage(json)); } const multiplier = getToolTokenMultiplier(provider, clientOptions); - return Math.ceil(toolTokens * multiplier); + return Math.ceil(rawTokens * multiplier); } /** - * Returns cached tool schema tokens or computes them on miss. + * Returns tool schema tokens, using per-tool caching to avoid redundant + * token counting. Each tool's raw (pre-multiplier) token count is cached + * individually by name, so adding/removing a tool only requires computing + * the new one. The provider-specific multiplier is applied to the sum. + * * Returns 0 if there are no tools. - * Single pass over tool arrays: builds fingerprint and serialized schemas - * together, then only runs the token counter if the cache misses. */ export async function getOrComputeToolTokens({ tools, @@ -143,45 +133,51 @@ export async function getOrComputeToolTokens({ clientOptions?: ClientOptions; tokenCounter: TokenCounter; }): Promise { - const { names, schemas } = collectToolData(tools, toolDefinitions); - if (names.length === 0) { + const schemas = collectToolSchemas(tools, toolDefinitions); + if (schemas.size === 0) { return 0; } - const fingerprint = names.join(',') + '|' + names.length; - const multiplier = getToolTokenMultiplier(provider, clientOptions); - const multiplierKey = multiplier === ANTHROPIC_TOOL_TOKEN_MULTIPLIER ? 'anthropic' : 'default'; - const cacheKey = `${provider}:${multiplierKey}:${fingerprint}`; - + let cache: Keyv | undefined; try { - const cache = getCache(); - const cached = (await cache.get(cacheKey)) as number | undefined; - if (cached != null && cached > 0) { - return cached; - } + cache = getCache(); } catch (err) { - logger.debug('[toolTokens] Cache read failed, computing fresh', err); + logger.debug('[toolTokens] Cache init failed, computing fresh', err); } - // Inline token count — not delegating to computeToolSchemaTokens to avoid - // a second collectToolData pass; schemas are already built above. - let toolTokens = 0; - for (const schema of schemas) { - toolTokens += tokenCounter(new SystemMessage(schema)); - } - const tokens = Math.ceil(toolTokens * multiplier); + let rawTotal = 0; + const toWrite: Array<{ key: string; value: number }> = []; - if (tokens > 0) { - try { - getCache() - .set(cacheKey, tokens) - .catch((err: unknown) => { - logger.debug('[toolTokens] Cache write failed', err); - }); - } catch { - // getCache() init failure on write path — non-fatal + for (const [name, json] of schemas) { + let rawCount: number | undefined; + + if (cache) { + try { + rawCount = (await cache.get(name)) as number | undefined; + } catch { + // Cache read failed for this tool — will compute fresh + } + } + + if (rawCount == null || rawCount <= 0) { + rawCount = tokenCounter(new SystemMessage(json)); + if (rawCount > 0 && cache) { + toWrite.push({ key: name, value: rawCount }); + } + } + + rawTotal += rawCount; + } + + // Fire-and-forget cache writes for newly computed tools + if (cache && toWrite.length > 0) { + for (const { key, value } of toWrite) { + cache.set(key, value).catch((err: unknown) => { + logger.debug(`[toolTokens] Cache write failed for ${key}`, err); + }); } } - return tokens; + const multiplier = getToolTokenMultiplier(provider, clientOptions); + return Math.ceil(rawTotal * multiplier); } From 8db4f21f97c3518e4cd766ab0b55b47c0b47a219 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Fri, 3 Apr 2026 12:53:13 -0400 Subject: [PATCH 10/11] fix: scope tool token cache keys by tenantId Tool definitions can differ per tenant in multi-tenant deployments. Prefix per-tool cache keys with tenantId when present on the user object, so tenants don't share stale token counts across different tool configurations. Cache key format: "{tenantId}:{toolName}" or "{toolName}" when no tenant context exists. --- packages/api/src/agents/run.ts | 1 + packages/api/src/agents/toolTokens.ts | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 805f1e5df8..1b67f2b337 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -390,6 +390,7 @@ export async function createRun({ provider, clientOptions: llmConfig, tokenCounter, + tenantId: user?.tenantId, }); } diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts index 829c03091e..22d88d0285 100644 --- a/packages/api/src/agents/toolTokens.ts +++ b/packages/api/src/agents/toolTokens.ts @@ -126,18 +126,22 @@ export async function getOrComputeToolTokens({ provider, clientOptions, tokenCounter, + tenantId, }: { tools?: GenericTool[]; toolDefinitions?: LCTool[]; provider: Providers; clientOptions?: ClientOptions; tokenCounter: TokenCounter; + tenantId?: string; }): Promise { const schemas = collectToolSchemas(tools, toolDefinitions); if (schemas.size === 0) { return 0; } + const keyPrefix = tenantId ? `${tenantId}:` : ''; + let cache: Keyv | undefined; try { cache = getCache(); @@ -149,11 +153,12 @@ export async function getOrComputeToolTokens({ const toWrite: Array<{ key: string; value: number }> = []; for (const [name, json] of schemas) { + const cacheKey = `${keyPrefix}${name}`; let rawCount: number | undefined; if (cache) { try { - rawCount = (await cache.get(name)) as number | undefined; + rawCount = (await cache.get(cacheKey)) as number | undefined; } catch { // Cache read failed for this tool — will compute fresh } @@ -162,7 +167,7 @@ export async function getOrComputeToolTokens({ if (rawCount == null || rawCount <= 0) { rawCount = tokenCounter(new SystemMessage(json)); if (rawCount > 0 && cache) { - toWrite.push({ key: name, value: rawCount }); + toWrite.push({ key: cacheKey, value: rawCount }); } } From e2962f4967af7fa1baa2537a65a9146896a211e9 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Fri, 3 Apr 2026 14:08:03 -0400 Subject: [PATCH 11/11] fix: include toolType in per-tool cache key Cache key is now {tenantId}:{name}:{toolType} (or {name}:{toolType} without tenant). This differentiates builtin/mcp/action tools that may share a name but have different schemas. GenericTool entries derive type from the mcp flag; LCTool entries use the toolType field (defaulting to builtin). Also refactors collectToolSchemas to return ToolEntry[] with pre-built cache keys instead of Map. --- packages/api/src/agents/toolTokens.spec.ts | 151 +++++++++++++-------- packages/api/src/agents/toolTokens.ts | 59 ++++---- 2 files changed, 132 insertions(+), 78 deletions(-) diff --git a/packages/api/src/agents/toolTokens.spec.ts b/packages/api/src/agents/toolTokens.spec.ts index 27461e0d67..8d324ea58b 100644 --- a/packages/api/src/agents/toolTokens.spec.ts +++ b/packages/api/src/agents/toolTokens.spec.ts @@ -37,11 +37,18 @@ function makeTool(name: string, description = `${name} description`): GenericToo }) as unknown as GenericTool; } -function makeToolDef(name: string, description?: string): LCTool { +function makeMcpTool(name: string): GenericTool { + const tool = makeTool(name) as unknown as Record; + tool.mcp = true; + return tool as unknown as GenericTool; +} + +function makeToolDef(name: string, opts?: Partial): LCTool { return { name, - description: description ?? `${name} description`, - parameters: { type: 'object', properties: { input: { type: 'string' } } }, + description: opts?.description ?? `${name} description`, + parameters: opts?.parameters ?? { type: 'object', properties: { input: { type: 'string' } } }, + ...opts, }; } @@ -65,34 +72,45 @@ beforeEach(() => { /* ========================================================================= */ describe('collectToolSchemas', () => { - it('returns empty map when no tools provided', () => { - expect(collectToolSchemas().size).toBe(0); - expect(collectToolSchemas([], []).size).toBe(0); + it('returns empty array when no tools provided', () => { + expect(collectToolSchemas()).toHaveLength(0); + expect(collectToolSchemas([], [])).toHaveLength(0); }); - it('collects schemas from GenericTool array keyed by name', () => { - const tools = [makeTool('alpha'), makeTool('beta')]; - const schemas = collectToolSchemas(tools); - expect(schemas.size).toBe(2); - expect(schemas.has('alpha')).toBe(true); - expect(schemas.has('beta')).toBe(true); + it('collects entries from GenericTool array', () => { + const entries = collectToolSchemas([makeTool('alpha'), makeTool('beta')]); + expect(entries).toHaveLength(2); + expect(entries.map((e) => e.cacheKey)).toEqual( + expect.arrayContaining(['alpha:builtin', 'beta:builtin']), + ); }); - it('collects schemas from LCTool definitions', () => { - const defs = [makeToolDef('x'), makeToolDef('y')]; - const schemas = collectToolSchemas(undefined, defs); - expect(schemas.size).toBe(2); - expect(schemas.has('x')).toBe(true); - expect(schemas.has('y')).toBe(true); + it('collects entries from LCTool definitions with toolType', () => { + const defs = [makeToolDef('x', { toolType: 'mcp' }), makeToolDef('y', { toolType: 'action' })]; + const entries = collectToolSchemas(undefined, defs); + expect(entries).toHaveLength(2); + expect(entries[0].cacheKey).toBe('x:mcp'); + expect(entries[1].cacheKey).toBe('y:action'); + }); + + it('defaults toolType to builtin for LCTool without toolType', () => { + const entries = collectToolSchemas(undefined, [makeToolDef('z')]); + expect(entries[0].cacheKey).toBe('z:builtin'); + }); + + it('uses mcp type for GenericTool with mcp flag', () => { + const entries = collectToolSchemas([makeMcpTool('search')]); + expect(entries[0].cacheKey).toBe('search:mcp'); }); it('deduplicates: GenericTool takes precedence over matching toolDefinition', () => { const tools = [makeTool('shared')]; const defs = [makeToolDef('shared'), makeToolDef('only_def')]; - const schemas = collectToolSchemas(tools, defs); - expect(schemas.size).toBe(2); - expect(schemas.has('shared')).toBe(true); - expect(schemas.has('only_def')).toBe(true); + const entries = collectToolSchemas(tools, defs); + expect(entries).toHaveLength(2); + const keys = entries.map((e) => e.cacheKey); + expect(keys).toContain('shared:builtin'); + expect(keys).toContain('only_def:builtin'); }); }); @@ -109,9 +127,8 @@ describe('computeToolSchemaTokens', () => { }); it('counts tokens from GenericTool schemas', () => { - const tools = [makeTool('test_tool')]; const result = computeToolSchemaTokens( - tools, + [makeTool('test_tool')], undefined, Providers.OPENAI, undefined, @@ -121,10 +138,9 @@ describe('computeToolSchemaTokens', () => { }); it('counts tokens from LCTool definitions', () => { - const defs = [makeToolDef('test_def')]; const result = computeToolSchemaTokens( undefined, - defs, + [makeToolDef('test_def')], Providers.OPENAI, undefined, fakeTokenCounter, @@ -150,7 +166,6 @@ describe('computeToolSchemaTokens', () => { undefined, fakeTokenCounter, ); - expect(both).toBe(toolsOnly); }); @@ -170,19 +185,17 @@ describe('computeToolSchemaTokens', () => { undefined, fakeTokenCounter, ); - const expectedRatio = ANTHROPIC_TOOL_TOKEN_MULTIPLIER / DEFAULT_TOOL_TOKEN_MULTIPLIER; expect(anthropic / openai).toBeCloseTo(expectedRatio, 1); }); it('applies Anthropic multiplier when model name contains "claude"', () => { const defs = [makeToolDef('tool')]; - const clientOptions = { model: 'claude-3-opus' }; const result = computeToolSchemaTokens( undefined, defs, Providers.OPENAI, - clientOptions, + { model: 'claude-3-opus' }, fakeTokenCounter, ); const defaultResult = computeToolSchemaTokens( @@ -197,12 +210,11 @@ describe('computeToolSchemaTokens', () => { it('does not apply Anthropic multiplier for Bedrock even with claude model', () => { const defs = [makeToolDef('tool')]; - const clientOptions = { model: 'claude-3-opus' }; const bedrock = computeToolSchemaTokens( undefined, defs, Providers.BEDROCK, - clientOptions, + { model: 'claude-3-opus' }, fakeTokenCounter, ); const defaultResult = computeToolSchemaTokens( @@ -212,7 +224,6 @@ describe('computeToolSchemaTokens', () => { undefined, fakeTokenCounter, ); - expect(bedrock).toBe(defaultResult); }); }); @@ -239,8 +250,8 @@ describe('getOrComputeToolTokens', () => { }); expect(result).toBeGreaterThan(0); - expect(mockCacheStore.has('tool_a')).toBe(true); - expect(mockCacheStore.has('tool_b')).toBe(true); + expect(mockCacheStore.has('tool_a:builtin')).toBe(true); + expect(mockCacheStore.has('tool_b:builtin')).toBe(true); expect(mockCacheStore.size).toBe(2); }); @@ -282,7 +293,6 @@ describe('getOrComputeToolTokens', () => { }); expect(openai).not.toBe(anthropic); - // Only one cache entry — raw count is provider-agnostic expect(mockCacheStore.size).toBe(1); }); @@ -302,17 +312,63 @@ describe('getOrComputeToolTokens', () => { tokenCounter: counter, }); - // Only one new tokenCounter call for tool_b expect(counter.mock.calls.length).toBe(callsAfterFirst + 1); expect(mockCacheStore.size).toBe(2); }); + it('scopes cache keys by tenantId when provided', async () => { + const defs = [makeToolDef('tool')]; + + await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + tenantId: 'tenant_123', + }); + + expect(mockCacheStore.has('tenant_123:tool:builtin')).toBe(true); + }); + + it('separates cache entries for different tenants', async () => { + const defs = [makeToolDef('tool')]; + + const t1 = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + tenantId: 'tenant_1', + }); + + const t2 = await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + tenantId: 'tenant_2', + }); + + expect(t1).toBe(t2); + expect(mockCacheStore.has('tenant_1:tool:builtin')).toBe(true); + expect(mockCacheStore.has('tenant_2:tool:builtin')).toBe(true); + expect(mockCacheStore.size).toBe(2); + }); + + it('caches mcp tools with mcp type in key', async () => { + const defs = [makeToolDef('search', { toolType: 'mcp' })]; + + await getOrComputeToolTokens({ + toolDefinitions: defs, + provider: Providers.OPENAI, + tokenCounter: fakeTokenCounter, + }); + + expect(mockCacheStore.has('search:mcp')).toBe(true); + }); + it('falls back to compute when cache read throws', async () => { mockGet.mockRejectedValueOnce(new Error('Redis down')); - const defs = [makeToolDef('tool')]; const result = await getOrComputeToolTokens({ - toolDefinitions: defs, + toolDefinitions: [makeToolDef('tool')], provider: Providers.OPENAI, tokenCounter: fakeTokenCounter, }); @@ -324,9 +380,8 @@ describe('getOrComputeToolTokens', () => { it('does not throw when cache write fails', async () => { mockSet.mockRejectedValueOnce(new Error('Redis write error')); - const defs = [makeToolDef('tool_write_fail')]; const result = await getOrComputeToolTokens({ - toolDefinitions: defs, + toolDefinitions: [makeToolDef('tool_write_fail')], provider: Providers.OPENAI, tokenCounter: fakeTokenCounter, }); @@ -335,20 +390,6 @@ describe('getOrComputeToolTokens', () => { expect(mockSet).toHaveBeenCalled(); }); - it('uses GenericTool tools for per-tool caching', async () => { - const tools = [makeTool('alpha'), makeTool('beta')]; - - const result = await getOrComputeToolTokens({ - tools, - provider: Providers.OPENAI, - tokenCounter: fakeTokenCounter, - }); - - expect(result).toBeGreaterThan(0); - expect(mockCacheStore.has('alpha')).toBe(true); - expect(mockCacheStore.has('beta')).toBe(true); - }); - it('matches computeToolSchemaTokens output for same inputs', async () => { const defs = [makeToolDef('a'), makeToolDef('b'), makeToolDef('c')]; diff --git a/packages/api/src/agents/toolTokens.ts b/packages/api/src/agents/toolTokens.ts index 22d88d0285..4a95b07600 100644 --- a/packages/api/src/agents/toolTokens.ts +++ b/packages/api/src/agents/toolTokens.ts @@ -13,6 +13,11 @@ import type { Keyv } from 'keyv'; import { standardCache } from '~/cache'; +interface ToolEntry { + cacheKey: string; + json: string; +} + /** Module-level cache instance, lazily initialized. */ let toolTokenCache: Keyv | undefined; @@ -61,35 +66,43 @@ function serializeToolDef(def: LCTool): string { } /** - * Builds a map of tool name → serialized schema JSON. Deduplicates: a tool - * present in `tools` (with a schema) takes precedence over a matching - * `toolDefinitions` entry, mirroring AgentContext.calculateInstructionTokens(). + * Builds a list of tool entries with cache keys and serialized schemas. + * Deduplicates: a tool present in `tools` (with a schema) takes precedence + * over a matching `toolDefinitions` entry. + * + * Cache key includes toolType when available (from LCTool) to differentiate + * builtin/mcp/action tools that may share a name. + * GenericTool entries use the `mcp` flag when present. */ -export function collectToolSchemas( - tools?: GenericTool[], - toolDefinitions?: LCTool[], -): Map { - const schemas = new Map(); +export function collectToolSchemas(tools?: GenericTool[], toolDefinitions?: LCTool[]): ToolEntry[] { + const seen = new Set(); + const entries: ToolEntry[] = []; if (tools) { for (const tool of tools) { const result = serializeGenericTool(tool); - if (result && result.name) { - schemas.set(result.name, result.json); + if (!result || !result.name) { + continue; } + seen.add(result.name); + const toolType = + (tool as unknown as Record).mcp === true ? 'mcp' : 'builtin'; + entries.push({ cacheKey: `${result.name}:${toolType}`, json: result.json }); } } if (toolDefinitions) { for (const def of toolDefinitions) { - if (!def.name || schemas.has(def.name)) { + if (!def.name || seen.has(def.name)) { continue; } - schemas.set(def.name, serializeToolDef(def)); + seen.add(def.name); + const toolType = def.toolType ?? 'builtin'; + entries.push({ cacheKey: `${def.name}:${toolType}`, json: serializeToolDef(def) }); } } - return schemas; + return entries; } /** @@ -103,9 +116,9 @@ export function computeToolSchemaTokens( clientOptions: ClientOptions | undefined, tokenCounter: TokenCounter, ): number { - const schemas = collectToolSchemas(tools, toolDefinitions); + const entries = collectToolSchemas(tools, toolDefinitions); let rawTokens = 0; - for (const json of schemas.values()) { + for (const { json } of entries) { rawTokens += tokenCounter(new SystemMessage(json)); } const multiplier = getToolTokenMultiplier(provider, clientOptions); @@ -115,8 +128,8 @@ export function computeToolSchemaTokens( /** * Returns tool schema tokens, using per-tool caching to avoid redundant * token counting. Each tool's raw (pre-multiplier) token count is cached - * individually by name, so adding/removing a tool only requires computing - * the new one. The provider-specific multiplier is applied to the sum. + * individually, keyed by `{tenantId}:{name}:{toolType}` (or `{name}:{toolType}` + * without tenant). The provider-specific multiplier is applied to the sum. * * Returns 0 if there are no tools. */ @@ -135,8 +148,8 @@ export async function getOrComputeToolTokens({ tokenCounter: TokenCounter; tenantId?: string; }): Promise { - const schemas = collectToolSchemas(tools, toolDefinitions); - if (schemas.size === 0) { + const entries = collectToolSchemas(tools, toolDefinitions); + if (entries.length === 0) { return 0; } @@ -152,13 +165,13 @@ export async function getOrComputeToolTokens({ let rawTotal = 0; const toWrite: Array<{ key: string; value: number }> = []; - for (const [name, json] of schemas) { - const cacheKey = `${keyPrefix}${name}`; + for (const { cacheKey, json } of entries) { + const fullKey = `${keyPrefix}${cacheKey}`; let rawCount: number | undefined; if (cache) { try { - rawCount = (await cache.get(cacheKey)) as number | undefined; + rawCount = (await cache.get(fullKey)) as number | undefined; } catch { // Cache read failed for this tool — will compute fresh } @@ -167,7 +180,7 @@ export async function getOrComputeToolTokens({ if (rawCount == null || rawCount <= 0) { rawCount = tokenCounter(new SystemMessage(json)); if (rawCount > 0 && cache) { - toWrite.push({ key: cacheKey, value: rawCount }); + toWrite.push({ key: fullKey, value: rawCount }); } }