feat: cache tool schema token counts to avoid redundant recalculation

Add time-based caching (30min TTL) for tool schema token counts using
the existing Keyv/Redis infrastructure. Cache is keyed by provider and
a lightweight fingerprint (sorted tool names + count), so agents sharing
the same tool set share the cached value.

New utility module (toolTokens.ts) provides reusable functions:
- getToolFingerprint: stable fingerprint from tool names
- computeToolSchemaTokens: mirrors AgentContext.calculateInstructionTokens
- getOrComputeToolTokens: cache lookup with compute-on-miss

In createRun, buildAgentContext is now async with Promise.all for
parallel cache lookups in multi-agent runs. Pre-computed tokens are
passed via AgentInputs.toolSchemaTokens, skipping calculateInstructionTokens
in @librechat/agents entirely on cache hit.
This commit is contained in:
Danny Avila 2026-03-24 13:36:10 -04:00
parent 611a1ef5dc
commit d18d34d9cf
4 changed files with 193 additions and 5 deletions

View file

@ -56,6 +56,7 @@ const namespaces = {
CacheKeys.ADMIN_OAUTH_EXCHANGE,
Time.THIRTY_SECONDS,
),
[CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES),
};
/**

View file

@ -18,6 +18,7 @@ import type { BaseMessage } from '@langchain/core/messages';
import type { IUser } from '@librechat/data-schemas';
import type * as t from '~/types';
import { resolveHeaders, createSafeUser } from '~/utils/env';
import { getOrComputeToolTokens } from './toolTokens';
/** Expected shape of JSON tool search results */
interface ToolSearchJsonResult {
@ -296,7 +297,7 @@ export async function createRun({
: new Set<string>();
const agentInputs: AgentInputs[] = [];
const buildAgentContext = (agent: RunAgent) => {
const buildAgentContext = async (agent: RunAgent): Promise<AgentInputs> => {
const provider =
(providerEndpointMap[
agent.provider as keyof typeof providerEndpointMap
@ -381,11 +382,24 @@ export async function createRun({
agent.maxContextTokens,
);
/** Resolve cached or computed tool schema tokens */
let toolSchemaTokens: number | undefined;
if (tokenCounter) {
toolSchemaTokens = await getOrComputeToolTokens({
tools: agent.tools,
toolDefinitions,
provider,
clientOptions: llmConfig,
tokenCounter,
});
}
const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint);
const agentInput: AgentInputs = {
provider,
reasoningKey,
toolDefinitions,
toolSchemaTokens,
agentId: agent.id,
tools: agent.tools,
clientOptions: llmConfig,
@ -401,12 +415,11 @@ export async function createRun({
contextPruningConfig: summarization.contextPruning,
maxToolResultChars: agent.maxToolResultChars,
};
agentInputs.push(agentInput);
return agentInput;
};
for (const agent of agents) {
buildAgentContext(agent);
}
const resolvedInputs = await Promise.all(agents.map(buildAgentContext));
agentInputs.push(...resolvedInputs);
const graphConfig: RunConfig['graphConfig'] = {
signal,

View file

@ -0,0 +1,169 @@
import { SystemMessage } from '@langchain/core/messages';
import {
Providers,
toJsonSchema,
ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
DEFAULT_TOOL_TOKEN_MULTIPLIER,
} from '@librechat/agents';
import { CacheKeys, Time } from 'librechat-data-provider';
import { standardCache } from '~/cache';
import type { Keyv } from 'keyv';
import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
/** Module-level cache instance, lazily initialized. */
let toolTokenCache: Keyv | undefined;
function getCache(): Keyv {
if (!toolTokenCache) {
toolTokenCache = standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES);
}
return toolTokenCache;
}
/**
* Builds a lightweight fingerprint from tool names.
* Sorted and deduplicated to ensure stability regardless of tool ordering.
*/
export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
const names = new Set<string>();
if (tools) {
for (const tool of tools) {
const name = (tool as unknown as Record<string, unknown>).name;
if (typeof name === 'string' && name) {
names.add(name);
}
}
}
if (toolDefinitions) {
for (const def of toolDefinitions) {
if (def.name) {
names.add(def.name);
}
}
}
if (names.size === 0) {
return '';
}
const sorted = Array.from(names).sort();
return sorted.join(',') + '|' + sorted.length;
}
/**
* Determines the provider-specific token multiplier for tool schemas.
*/
function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
const isAnthropic =
provider !== Providers.BEDROCK &&
(provider === Providers.ANTHROPIC ||
/anthropic|claude/i.test(
String((clientOptions as { model?: string } | undefined)?.model ?? ''),
));
return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
}
/**
* Computes tool schema tokens from scratch using the provided token counter.
* Mirrors the logic in AgentContext.calculateInstructionTokens().
*/
export function computeToolSchemaTokens(
tools: GenericTool[] | undefined,
toolDefinitions: LCTool[] | undefined,
provider: Providers,
clientOptions: ClientOptions | undefined,
tokenCounter: TokenCounter,
): number {
let toolTokens = 0;
const countedToolNames = new Set<string>();
if (tools && tools.length > 0) {
for (const tool of tools) {
const genericTool = tool as unknown as Record<string, unknown>;
if (genericTool.schema != null && typeof genericTool.schema === 'object') {
const toolName = (genericTool.name as string | undefined) ?? '';
const jsonSchema = toJsonSchema(
genericTool.schema,
toolName,
(genericTool.description as string | undefined) ?? '',
);
toolTokens += tokenCounter(new SystemMessage(JSON.stringify(jsonSchema)));
if (toolName) {
countedToolNames.add(toolName);
}
}
}
}
if (toolDefinitions && toolDefinitions.length > 0) {
for (const def of toolDefinitions) {
if (countedToolNames.has(def.name)) {
continue;
}
const schema = {
type: 'function',
function: {
name: def.name,
description: def.description ?? '',
parameters: def.parameters ?? {},
},
};
toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
}
}
const multiplier = getToolTokenMultiplier(provider, clientOptions);
return Math.ceil(toolTokens * multiplier);
}
/**
* Returns cached tool schema tokens if the fingerprint matches,
* otherwise computes them, caches the result (fire-and-forget), and returns.
*
* Returns 0 if there are no tools (no caching needed).
*/
export async function getOrComputeToolTokens({
tools,
toolDefinitions,
provider,
clientOptions,
tokenCounter,
}: {
tools?: GenericTool[];
toolDefinitions?: LCTool[];
provider: Providers;
clientOptions?: ClientOptions;
tokenCounter: TokenCounter;
}): Promise<number> {
const fingerprint = getToolFingerprint(tools, toolDefinitions);
if (!fingerprint) {
return 0;
}
const cacheKey = `${provider}:${fingerprint}`;
const cache = getCache();
const cached = (await cache.get(cacheKey)) as number | undefined;
if (cached != null && cached > 0) {
return cached;
}
const tokens = computeToolSchemaTokens(
tools,
toolDefinitions,
provider,
clientOptions,
tokenCounter,
);
if (tokens > 0) {
/** Fire-and-forget write — don't block the run on cache persistence */
cache.set(cacheKey, tokens).catch(() => {
/* swallow cache write errors */
});
}
return tokens;
}

View file

@ -1529,6 +1529,11 @@ export enum CacheKeys {
* Key for admin panel OAuth exchange codes (one-time-use, short TTL).
*/
ADMIN_OAUTH_EXCHANGE = 'ADMIN_OAUTH_EXCHANGE',
/**
* Key for cached tool schema token counts.
* Keyed by provider + tool fingerprint to avoid redundant token counting.
*/
TOOL_TOKENS = 'TOOL_TOKENS',
}
/**