mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-04-07 08:25:23 +02:00
feat: cache tool schema token counts to avoid redundant recalculation
Add time-based caching (30min TTL) for tool schema token counts using the existing Keyv/Redis infrastructure. Cache is keyed by provider and a lightweight fingerprint (sorted tool names + count), so agents sharing the same tool set share the cached value. New utility module (toolTokens.ts) provides reusable functions: - getToolFingerprint: stable fingerprint from tool names - computeToolSchemaTokens: mirrors AgentContext.calculateInstructionTokens - getOrComputeToolTokens: cache lookup with compute-on-miss In createRun, buildAgentContext is now async with Promise.all for parallel cache lookups in multi-agent runs. Pre-computed tokens are passed via AgentInputs.toolSchemaTokens, skipping calculateInstructionTokens in @librechat/agents entirely on cache hit.
This commit is contained in:
parent
611a1ef5dc
commit
d18d34d9cf
4 changed files with 193 additions and 5 deletions
1
api/cache/getLogStores.js
vendored
1
api/cache/getLogStores.js
vendored
|
|
@ -56,6 +56,7 @@ const namespaces = {
|
|||
CacheKeys.ADMIN_OAUTH_EXCHANGE,
|
||||
Time.THIRTY_SECONDS,
|
||||
),
|
||||
[CacheKeys.TOOL_TOKENS]: standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES),
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import type { BaseMessage } from '@langchain/core/messages';
|
|||
import type { IUser } from '@librechat/data-schemas';
|
||||
import type * as t from '~/types';
|
||||
import { resolveHeaders, createSafeUser } from '~/utils/env';
|
||||
import { getOrComputeToolTokens } from './toolTokens';
|
||||
|
||||
/** Expected shape of JSON tool search results */
|
||||
interface ToolSearchJsonResult {
|
||||
|
|
@ -296,7 +297,7 @@ export async function createRun({
|
|||
: new Set<string>();
|
||||
|
||||
const agentInputs: AgentInputs[] = [];
|
||||
const buildAgentContext = (agent: RunAgent) => {
|
||||
const buildAgentContext = async (agent: RunAgent): Promise<AgentInputs> => {
|
||||
const provider =
|
||||
(providerEndpointMap[
|
||||
agent.provider as keyof typeof providerEndpointMap
|
||||
|
|
@ -381,11 +382,24 @@ export async function createRun({
|
|||
agent.maxContextTokens,
|
||||
);
|
||||
|
||||
/** Resolve cached or computed tool schema tokens */
|
||||
let toolSchemaTokens: number | undefined;
|
||||
if (tokenCounter) {
|
||||
toolSchemaTokens = await getOrComputeToolTokens({
|
||||
tools: agent.tools,
|
||||
toolDefinitions,
|
||||
provider,
|
||||
clientOptions: llmConfig,
|
||||
tokenCounter,
|
||||
});
|
||||
}
|
||||
|
||||
const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint);
|
||||
const agentInput: AgentInputs = {
|
||||
provider,
|
||||
reasoningKey,
|
||||
toolDefinitions,
|
||||
toolSchemaTokens,
|
||||
agentId: agent.id,
|
||||
tools: agent.tools,
|
||||
clientOptions: llmConfig,
|
||||
|
|
@ -401,12 +415,11 @@ export async function createRun({
|
|||
contextPruningConfig: summarization.contextPruning,
|
||||
maxToolResultChars: agent.maxToolResultChars,
|
||||
};
|
||||
agentInputs.push(agentInput);
|
||||
return agentInput;
|
||||
};
|
||||
|
||||
for (const agent of agents) {
|
||||
buildAgentContext(agent);
|
||||
}
|
||||
const resolvedInputs = await Promise.all(agents.map(buildAgentContext));
|
||||
agentInputs.push(...resolvedInputs);
|
||||
|
||||
const graphConfig: RunConfig['graphConfig'] = {
|
||||
signal,
|
||||
|
|
|
|||
169
packages/api/src/agents/toolTokens.ts
Normal file
169
packages/api/src/agents/toolTokens.ts
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
import { SystemMessage } from '@langchain/core/messages';
|
||||
import {
|
||||
Providers,
|
||||
toJsonSchema,
|
||||
ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
|
||||
DEFAULT_TOOL_TOKEN_MULTIPLIER,
|
||||
} from '@librechat/agents';
|
||||
import { CacheKeys, Time } from 'librechat-data-provider';
|
||||
import { standardCache } from '~/cache';
|
||||
import type { Keyv } from 'keyv';
|
||||
import type { GenericTool, LCTool, TokenCounter, ClientOptions } from '@librechat/agents';
|
||||
|
||||
/** Module-level cache instance, lazily initialized. */
|
||||
let toolTokenCache: Keyv | undefined;
|
||||
|
||||
function getCache(): Keyv {
|
||||
if (!toolTokenCache) {
|
||||
toolTokenCache = standardCache(CacheKeys.TOOL_TOKENS, Time.THIRTY_MINUTES);
|
||||
}
|
||||
return toolTokenCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a lightweight fingerprint from tool names.
|
||||
* Sorted and deduplicated to ensure stability regardless of tool ordering.
|
||||
*/
|
||||
export function getToolFingerprint(tools?: GenericTool[], toolDefinitions?: LCTool[]): string {
|
||||
const names = new Set<string>();
|
||||
|
||||
if (tools) {
|
||||
for (const tool of tools) {
|
||||
const name = (tool as unknown as Record<string, unknown>).name;
|
||||
if (typeof name === 'string' && name) {
|
||||
names.add(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (toolDefinitions) {
|
||||
for (const def of toolDefinitions) {
|
||||
if (def.name) {
|
||||
names.add(def.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (names.size === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const sorted = Array.from(names).sort();
|
||||
return sorted.join(',') + '|' + sorted.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the provider-specific token multiplier for tool schemas.
|
||||
*/
|
||||
function getToolTokenMultiplier(provider: Providers, clientOptions?: ClientOptions): number {
|
||||
const isAnthropic =
|
||||
provider !== Providers.BEDROCK &&
|
||||
(provider === Providers.ANTHROPIC ||
|
||||
/anthropic|claude/i.test(
|
||||
String((clientOptions as { model?: string } | undefined)?.model ?? ''),
|
||||
));
|
||||
return isAnthropic ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER : DEFAULT_TOOL_TOKEN_MULTIPLIER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes tool schema tokens from scratch using the provided token counter.
|
||||
* Mirrors the logic in AgentContext.calculateInstructionTokens().
|
||||
*/
|
||||
export function computeToolSchemaTokens(
|
||||
tools: GenericTool[] | undefined,
|
||||
toolDefinitions: LCTool[] | undefined,
|
||||
provider: Providers,
|
||||
clientOptions: ClientOptions | undefined,
|
||||
tokenCounter: TokenCounter,
|
||||
): number {
|
||||
let toolTokens = 0;
|
||||
const countedToolNames = new Set<string>();
|
||||
|
||||
if (tools && tools.length > 0) {
|
||||
for (const tool of tools) {
|
||||
const genericTool = tool as unknown as Record<string, unknown>;
|
||||
if (genericTool.schema != null && typeof genericTool.schema === 'object') {
|
||||
const toolName = (genericTool.name as string | undefined) ?? '';
|
||||
const jsonSchema = toJsonSchema(
|
||||
genericTool.schema,
|
||||
toolName,
|
||||
(genericTool.description as string | undefined) ?? '',
|
||||
);
|
||||
toolTokens += tokenCounter(new SystemMessage(JSON.stringify(jsonSchema)));
|
||||
if (toolName) {
|
||||
countedToolNames.add(toolName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (toolDefinitions && toolDefinitions.length > 0) {
|
||||
for (const def of toolDefinitions) {
|
||||
if (countedToolNames.has(def.name)) {
|
||||
continue;
|
||||
}
|
||||
const schema = {
|
||||
type: 'function',
|
||||
function: {
|
||||
name: def.name,
|
||||
description: def.description ?? '',
|
||||
parameters: def.parameters ?? {},
|
||||
},
|
||||
};
|
||||
toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
|
||||
}
|
||||
}
|
||||
|
||||
const multiplier = getToolTokenMultiplier(provider, clientOptions);
|
||||
return Math.ceil(toolTokens * multiplier);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns cached tool schema tokens if the fingerprint matches,
|
||||
* otherwise computes them, caches the result (fire-and-forget), and returns.
|
||||
*
|
||||
* Returns 0 if there are no tools (no caching needed).
|
||||
*/
|
||||
export async function getOrComputeToolTokens({
|
||||
tools,
|
||||
toolDefinitions,
|
||||
provider,
|
||||
clientOptions,
|
||||
tokenCounter,
|
||||
}: {
|
||||
tools?: GenericTool[];
|
||||
toolDefinitions?: LCTool[];
|
||||
provider: Providers;
|
||||
clientOptions?: ClientOptions;
|
||||
tokenCounter: TokenCounter;
|
||||
}): Promise<number> {
|
||||
const fingerprint = getToolFingerprint(tools, toolDefinitions);
|
||||
if (!fingerprint) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const cacheKey = `${provider}:${fingerprint}`;
|
||||
const cache = getCache();
|
||||
|
||||
const cached = (await cache.get(cacheKey)) as number | undefined;
|
||||
if (cached != null && cached > 0) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const tokens = computeToolSchemaTokens(
|
||||
tools,
|
||||
toolDefinitions,
|
||||
provider,
|
||||
clientOptions,
|
||||
tokenCounter,
|
||||
);
|
||||
|
||||
if (tokens > 0) {
|
||||
/** Fire-and-forget write — don't block the run on cache persistence */
|
||||
cache.set(cacheKey, tokens).catch(() => {
|
||||
/* swallow cache write errors */
|
||||
});
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
|
@ -1529,6 +1529,11 @@ export enum CacheKeys {
|
|||
* Key for admin panel OAuth exchange codes (one-time-use, short TTL).
|
||||
*/
|
||||
ADMIN_OAUTH_EXCHANGE = 'ADMIN_OAUTH_EXCHANGE',
|
||||
/**
|
||||
* Key for cached tool schema token counts.
|
||||
* Keyed by provider + tool fingerprint to avoid redundant token counting.
|
||||
*/
|
||||
TOOL_TOKENS = 'TOOL_TOKENS',
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue