LibreChat/packages/api/src/endpoints/anthropic/llm.ts
Danny Avila 2a50c372ef
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
🪙 refactor: Collected Usage & Anthropic Prompt Caching (#11319)
* 🔧 refactor: Improve token calculation in AgentClient.recordCollectedUsage

- Updated the token calculation logic to sum output tokens directly from all entries, addressing issues with negative values in parallel execution scenarios.
- Added comments for clarity on the usage of input tokens and output tokens.
- Introduced a new test file for comprehensive testing of the recordCollectedUsage function, covering various execution scenarios including sequential and parallel processing, cache token handling, and model fallback logic.

* 🔧 refactor: Anthropic `promptCache` handling in LLM configuration

* 🔧 test: Add comprehensive test for cache token handling in recordCollectedUsage

- Introduced a new test case to validate the handling of cache tokens across multiple tool calls in the recordCollectedUsage function.
- Ensured correct calculations for input and output tokens, including scenarios with cache creation and reading.
- Verified the expected interactions with token spending methods to enhance the robustness of the token management logic.
2026-01-12 23:02:08 -05:00

264 lines
8.9 KiB
TypeScript

import { Dispatcher, ProxyAgent } from 'undici';
import { logger } from '@librechat/data-schemas';
import { AnthropicClientOptions } from '@librechat/agents';
import { anthropicSettings, removeNullishValues, AuthKeys } from 'librechat-data-provider';
import type {
AnthropicLLMConfigResult,
AnthropicConfigOptions,
AnthropicCredentials,
} from '~/types/anthropic';
import { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } from './helpers';
import {
createAnthropicVertexClient,
isAnthropicVertexCredentials,
getVertexDeploymentName,
} from './vertex';
/**
* Parses credentials from string or object format
* - If a valid JSON string is passed, it parses and returns the object
* - If a plain API key string is passed, it wraps it in an AnthropicCredentials object
* - If an object is passed, it returns it directly
* - If undefined, returns an empty object
*/
function parseCredentials(
credentials: string | AnthropicCredentials | undefined,
): AnthropicCredentials {
if (typeof credentials === 'string') {
try {
return JSON.parse(credentials);
} catch {
// If not valid JSON, treat as a plain API key
logger.debug('[Anthropic] Credentials not JSON, treating as API key');
return { [AuthKeys.ANTHROPIC_API_KEY]: credentials };
}
}
return credentials && typeof credentials === 'object' ? credentials : {};
}
/** Known Anthropic parameters that map directly to the client config */
export const knownAnthropicParams = new Set([
'model',
'temperature',
'topP',
'topK',
'maxTokens',
'maxOutputTokens',
'stopSequences',
'stop',
'stream',
'apiKey',
'maxRetries',
'timeout',
'anthropicVersion',
'anthropicApiUrl',
'defaultHeaders',
]);
/**
* Applies default parameters to the target object only if the field is undefined
* @param target - The target object to apply defaults to
* @param defaults - Record of default parameter values
*/
function applyDefaultParams(target: Record<string, unknown>, defaults: Record<string, unknown>) {
for (const [key, value] of Object.entries(defaults)) {
if (target[key] === undefined) {
target[key] = value;
}
}
}
/**
* Generates configuration options for creating an Anthropic language model (LLM) instance.
* @param credentials - The API key for authentication with Anthropic, or credentials object for Vertex AI.
* @param options={} - Additional options for configuring the LLM.
* @returns Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
*/
function getLLMConfig(
credentials: string | AnthropicCredentials | undefined,
options: AnthropicConfigOptions = {},
): AnthropicLLMConfigResult {
const systemOptions = {
thinking: options.modelOptions?.thinking ?? anthropicSettings.thinking.default,
promptCache: options.modelOptions?.promptCache ?? anthropicSettings.promptCache.default,
thinkingBudget:
options.modelOptions?.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
};
/** Couldn't figure out a way to still loop through the object while deleting the overlapping keys when porting this
* over from javascript, so for now they are being deleted manually until a better way presents itself.
*/
if (options.modelOptions) {
delete options.modelOptions.thinking;
delete options.modelOptions.promptCache;
delete options.modelOptions.thinkingBudget;
} else {
throw new Error('No modelOptions provided');
}
const defaultOptions = {
model: anthropicSettings.model.default,
stream: true,
};
const mergedOptions = Object.assign(defaultOptions, options.modelOptions);
let enableWebSearch = mergedOptions.web_search;
let requestOptions: AnthropicClientOptions & { stream?: boolean } = {
model: mergedOptions.model,
stream: mergedOptions.stream,
temperature: mergedOptions.temperature,
stopSequences: mergedOptions.stop,
maxTokens:
mergedOptions.maxOutputTokens || anthropicSettings.maxOutputTokens.reset(mergedOptions.model),
clientOptions: {},
invocationKwargs: {
metadata: {
user_id: mergedOptions.user,
},
},
};
const creds = parseCredentials(credentials);
const apiKey = creds[AuthKeys.ANTHROPIC_API_KEY] ?? null;
if (isAnthropicVertexCredentials(creds)) {
// Vertex AI configuration - use custom client with optional YAML config
// Map the visible model name to the actual deployment name for Vertex AI
const deploymentName = getVertexDeploymentName(
requestOptions.model ?? '',
options.vertexConfig,
);
requestOptions.model = deploymentName;
requestOptions.createClient = () =>
createAnthropicVertexClient(creds, requestOptions.clientOptions, options.vertexOptions);
} else if (apiKey) {
// Direct API configuration
requestOptions.apiKey = apiKey;
} else {
throw new Error(
'Invalid credentials provided. Please provide either a valid Anthropic API key or service account credentials for Vertex AI.',
);
}
requestOptions = configureReasoning(requestOptions, systemOptions);
if (!/claude-3[-.]7/.test(mergedOptions.model)) {
requestOptions.topP = mergedOptions.topP;
requestOptions.topK = mergedOptions.topK;
} else if (requestOptions.thinking == null) {
requestOptions.topP = mergedOptions.topP;
requestOptions.topK = mergedOptions.topK;
}
const supportsCacheControl =
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model ?? '');
/** Pass promptCache boolean for downstream cache_control application */
if (supportsCacheControl) {
(requestOptions as Record<string, unknown>).promptCache = true;
}
const headers = getClaudeHeaders(requestOptions.model ?? '', supportsCacheControl);
if (headers && requestOptions.clientOptions) {
requestOptions.clientOptions.defaultHeaders = headers;
}
if (options.proxy && requestOptions.clientOptions) {
const proxyAgent = new ProxyAgent(options.proxy);
requestOptions.clientOptions.fetchOptions = {
dispatcher: proxyAgent,
};
}
if (options.reverseProxyUrl && requestOptions.clientOptions) {
requestOptions.clientOptions.baseURL = options.reverseProxyUrl;
requestOptions.anthropicApiUrl = options.reverseProxyUrl;
}
/** Handle defaultParams first - only process Anthropic-native params if undefined */
if (options.defaultParams && typeof options.defaultParams === 'object') {
for (const [key, value] of Object.entries(options.defaultParams)) {
/** Handle web_search separately - don't add to config */
if (key === 'web_search') {
if (enableWebSearch === undefined && typeof value === 'boolean') {
enableWebSearch = value;
}
continue;
}
if (knownAnthropicParams.has(key)) {
/** Route known Anthropic params to requestOptions only if undefined */
applyDefaultParams(requestOptions as Record<string, unknown>, { [key]: value });
}
/** Leave other params for transform to handle - they might be OpenAI params */
}
}
/** Handle addParams - can override defaultParams */
if (options.addParams && typeof options.addParams === 'object') {
for (const [key, value] of Object.entries(options.addParams)) {
/** Handle web_search separately - don't add to config */
if (key === 'web_search') {
if (typeof value === 'boolean') {
enableWebSearch = value;
}
continue;
}
if (knownAnthropicParams.has(key)) {
/** Route known Anthropic params to requestOptions */
(requestOptions as Record<string, unknown>)[key] = value;
}
/** Leave other params for transform to handle - they might be OpenAI params */
}
}
/** Handle dropParams - only drop from Anthropic config */
if (options.dropParams && Array.isArray(options.dropParams)) {
options.dropParams.forEach((param) => {
if (param === 'web_search') {
enableWebSearch = false;
return;
}
if (param in requestOptions) {
delete requestOptions[param as keyof AnthropicClientOptions];
}
if (requestOptions.invocationKwargs && param in requestOptions.invocationKwargs) {
delete (requestOptions.invocationKwargs as Record<string, unknown>)[param];
}
});
}
const tools = [];
if (enableWebSearch) {
tools.push({
type: 'web_search_20250305',
name: 'web_search',
});
if (isAnthropicVertexCredentials(creds)) {
if (!requestOptions.clientOptions) {
requestOptions.clientOptions = {};
}
requestOptions.clientOptions.defaultHeaders = {
...requestOptions.clientOptions.defaultHeaders,
'anthropic-beta': 'web-search-2025-03-05',
};
}
}
return {
tools,
llmConfig: removeNullishValues(
requestOptions as Record<string, unknown>,
) as AnthropicClientOptions & { clientOptions?: { fetchOptions?: { dispatcher: Dispatcher } } },
};
}
export { getLLMConfig };