mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-23 11:50:14 +01:00
✨ feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements (#6045)
* chore: remove auto-focus for now * refactor: move react-hook-form Controller Logic to AgentSelect from AgentPanel * fix: a11y focus issue with AgentSelect by never replacing it in its component tree * fix: maintain ComboBox focus and force re-render on agent ID change in AgentPanel * chore: `gemini-2.0-flash-lite-preview-02-05` (deprecated) * refactor: extract cache control logic and headers configuration to helper functions in AnthropicClient * feat: anthropic agents prompt caching * chore: bump @librechat/agents and related dependencies * fix: typo
This commit is contained in:
parent
d3d7d11ea8
commit
e14df5956a
12 changed files with 4460 additions and 1477 deletions
|
|
@ -17,6 +17,11 @@ const {
|
|||
parseParamFromPrompt,
|
||||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const {
|
||||
getClaudeHeaders,
|
||||
configureReasoning,
|
||||
checkPromptCacheSupport,
|
||||
} = require('~/server/services/Endpoints/anthropic/helpers');
|
||||
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
|
||||
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
|
|
@ -101,8 +106,7 @@ class AnthropicClient extends BaseClient {
|
|||
const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
|
||||
this.isClaude3 = modelMatch.includes('claude-3');
|
||||
this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
|
||||
this.supportsCacheControl =
|
||||
this.options.promptCache && this.checkPromptCacheSupport(modelMatch);
|
||||
this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);
|
||||
|
||||
if (
|
||||
this.isLegacyOutput &&
|
||||
|
|
@ -174,26 +178,9 @@ class AnthropicClient extends BaseClient {
|
|||
options.baseURL = this.options.reverseProxyUrl;
|
||||
}
|
||||
|
||||
if (
|
||||
this.supportsCacheControl &&
|
||||
requestOptions?.model &&
|
||||
requestOptions.model.includes('claude-3-5-sonnet')
|
||||
) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (
|
||||
this.supportsCacheControl &&
|
||||
requestOptions?.model &&
|
||||
requestOptions.model.includes('claude-3-7')
|
||||
) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (this.supportsCacheControl) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
};
|
||||
const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl);
|
||||
if (headers) {
|
||||
options.defaultHeaders = headers;
|
||||
}
|
||||
|
||||
return new Anthropic(options);
|
||||
|
|
@ -684,27 +671,6 @@ class AnthropicClient extends BaseClient {
|
|||
: await client.completions.create(options);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} modelName
|
||||
* @returns {boolean}
|
||||
*/
|
||||
checkPromptCacheSupport(modelName) {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
|
||||
if (modelMatch.includes('claude-3-5-sonnet-latest')) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
modelMatch === 'claude-3-7-sonnet' ||
|
||||
modelMatch === 'claude-3-5-sonnet' ||
|
||||
modelMatch === 'claude-3-5-haiku' ||
|
||||
modelMatch === 'claude-3-haiku' ||
|
||||
modelMatch === 'claude-3-opus'
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
getMessageMapMethod() {
|
||||
/**
|
||||
* @param {TMessage} msg
|
||||
|
|
@ -761,7 +727,7 @@ class AnthropicClient extends BaseClient {
|
|||
topK: top_k,
|
||||
} = this.modelOptions;
|
||||
|
||||
const requestOptions = {
|
||||
let requestOptions = {
|
||||
model,
|
||||
stream: stream || true,
|
||||
stop_sequences,
|
||||
|
|
@ -780,40 +746,10 @@ class AnthropicClient extends BaseClient {
|
|||
requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
|
||||
}
|
||||
|
||||
if (
|
||||
this.options.thinking &&
|
||||
requestOptions?.model &&
|
||||
requestOptions.model.includes('claude-3-7')
|
||||
) {
|
||||
requestOptions.thinking = {
|
||||
type: 'enabled',
|
||||
};
|
||||
}
|
||||
if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
|
||||
requestOptions.thinking = {
|
||||
...requestOptions.thinking,
|
||||
budget_tokens: this.options.thinkingBudget,
|
||||
};
|
||||
}
|
||||
if (
|
||||
requestOptions.thinking != null &&
|
||||
(requestOptions.max_tokens == null ||
|
||||
requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
|
||||
) {
|
||||
const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
|
||||
requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
|
||||
|
||||
logger.warn(
|
||||
requestOptions.max_tokens === maxTokens
|
||||
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
|
||||
: `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
|
||||
);
|
||||
|
||||
requestOptions.thinking.budget_tokens = Math.min(
|
||||
requestOptions.thinking.budget_tokens,
|
||||
Math.floor(requestOptions.max_tokens * 0.9),
|
||||
);
|
||||
}
|
||||
requestOptions = configureReasoning(requestOptions, {
|
||||
thinking: this.options.thinking,
|
||||
thinkingBudget: this.options.thinkingBudget,
|
||||
});
|
||||
|
||||
if (this.systemMessage && this.supportsCacheControl === true) {
|
||||
requestOptions.system = [
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue