mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-31 15:48:51 +01:00
✨ feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements (#6045)
* chore: remove auto-focus for now * refactor: move react-hook-form Controller Logic to AgentSelect from AgentPanel * fix: a11y focus issue with AgentSelect by never replacing it in its component tree * fix: maintain ComboBox focus and force re-render on agent ID change in AgentPanel * chore: `gemini-2.0-flash-lite-preview-02-05` (deprecated) * refactor: extract cache control logic and headers configuration to helper functions in AnthropicClient * feat: anthropic agents prompt caching * chore: bump @librechat/agents and related dependencies * fix: typo
This commit is contained in:
parent
d3d7d11ea8
commit
e14df5956a
12 changed files with 4460 additions and 1477 deletions
|
|
@ -17,6 +17,11 @@ const {
|
|||
parseParamFromPrompt,
|
||||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const {
|
||||
getClaudeHeaders,
|
||||
configureReasoning,
|
||||
checkPromptCacheSupport,
|
||||
} = require('~/server/services/Endpoints/anthropic/helpers');
|
||||
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
|
||||
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
|
|
@ -101,8 +106,7 @@ class AnthropicClient extends BaseClient {
|
|||
const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
|
||||
this.isClaude3 = modelMatch.includes('claude-3');
|
||||
this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
|
||||
this.supportsCacheControl =
|
||||
this.options.promptCache && this.checkPromptCacheSupport(modelMatch);
|
||||
this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);
|
||||
|
||||
if (
|
||||
this.isLegacyOutput &&
|
||||
|
|
@ -174,26 +178,9 @@ class AnthropicClient extends BaseClient {
|
|||
options.baseURL = this.options.reverseProxyUrl;
|
||||
}
|
||||
|
||||
if (
|
||||
this.supportsCacheControl &&
|
||||
requestOptions?.model &&
|
||||
requestOptions.model.includes('claude-3-5-sonnet')
|
||||
) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (
|
||||
this.supportsCacheControl &&
|
||||
requestOptions?.model &&
|
||||
requestOptions.model.includes('claude-3-7')
|
||||
) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (this.supportsCacheControl) {
|
||||
options.defaultHeaders = {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
};
|
||||
const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl);
|
||||
if (headers) {
|
||||
options.defaultHeaders = headers;
|
||||
}
|
||||
|
||||
return new Anthropic(options);
|
||||
|
|
@ -684,27 +671,6 @@ class AnthropicClient extends BaseClient {
|
|||
: await client.completions.create(options);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} modelName
|
||||
* @returns {boolean}
|
||||
*/
|
||||
checkPromptCacheSupport(modelName) {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
|
||||
if (modelMatch.includes('claude-3-5-sonnet-latest')) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
modelMatch === 'claude-3-7-sonnet' ||
|
||||
modelMatch === 'claude-3-5-sonnet' ||
|
||||
modelMatch === 'claude-3-5-haiku' ||
|
||||
modelMatch === 'claude-3-haiku' ||
|
||||
modelMatch === 'claude-3-opus'
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
getMessageMapMethod() {
|
||||
/**
|
||||
* @param {TMessage} msg
|
||||
|
|
@ -761,7 +727,7 @@ class AnthropicClient extends BaseClient {
|
|||
topK: top_k,
|
||||
} = this.modelOptions;
|
||||
|
||||
const requestOptions = {
|
||||
let requestOptions = {
|
||||
model,
|
||||
stream: stream || true,
|
||||
stop_sequences,
|
||||
|
|
@ -780,40 +746,10 @@ class AnthropicClient extends BaseClient {
|
|||
requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
|
||||
}
|
||||
|
||||
if (
|
||||
this.options.thinking &&
|
||||
requestOptions?.model &&
|
||||
requestOptions.model.includes('claude-3-7')
|
||||
) {
|
||||
requestOptions.thinking = {
|
||||
type: 'enabled',
|
||||
};
|
||||
}
|
||||
if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
|
||||
requestOptions.thinking = {
|
||||
...requestOptions.thinking,
|
||||
budget_tokens: this.options.thinkingBudget,
|
||||
};
|
||||
}
|
||||
if (
|
||||
requestOptions.thinking != null &&
|
||||
(requestOptions.max_tokens == null ||
|
||||
requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
|
||||
) {
|
||||
const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
|
||||
requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;
|
||||
|
||||
logger.warn(
|
||||
requestOptions.max_tokens === maxTokens
|
||||
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
|
||||
: `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
|
||||
);
|
||||
|
||||
requestOptions.thinking.budget_tokens = Math.min(
|
||||
requestOptions.thinking.budget_tokens,
|
||||
Math.floor(requestOptions.max_tokens * 0.9),
|
||||
);
|
||||
}
|
||||
requestOptions = configureReasoning(requestOptions, {
|
||||
thinking: this.options.thinking,
|
||||
thinkingBudget: this.options.thinkingBudget,
|
||||
});
|
||||
|
||||
if (this.systemMessage && this.supportsCacheControl === true) {
|
||||
requestOptions.system = [
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* Anthropic API: Adds cache control to the appropriate user messages in the payload.
|
||||
* @param {Array<AnthropicMessage>} messages - The array of message objects.
|
||||
* @returns {Array<AnthropicMessage>} - The updated array of message objects with cache control added.
|
||||
* @param {Array<AnthropicMessage | BaseMessage>} messages - The array of message objects.
|
||||
* @returns {Array<AnthropicMessage | BaseMessage>} - The updated array of message objects with cache control added.
|
||||
*/
|
||||
function addCacheControl(messages) {
|
||||
if (!Array.isArray(messages) || messages.length < 2) {
|
||||
|
|
@ -13,7 +13,9 @@ function addCacheControl(messages) {
|
|||
|
||||
for (let i = updatedMessages.length - 1; i >= 0 && userMessagesModified < 2; i--) {
|
||||
const message = updatedMessages[i];
|
||||
if (message.role !== 'user') {
|
||||
if (message.getType != null && message.getType() !== 'human') {
|
||||
continue;
|
||||
} else if (message.getType == null && message.role !== 'user') {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,11 +41,11 @@
|
|||
"@keyv/mongo": "^2.1.8",
|
||||
"@keyv/redis": "^2.8.1",
|
||||
"@langchain/community": "^0.3.14",
|
||||
"@langchain/core": "^0.3.37",
|
||||
"@langchain/google-genai": "^0.1.7",
|
||||
"@langchain/google-vertexai": "^0.1.8",
|
||||
"@langchain/core": "^0.3.40",
|
||||
"@langchain/google-genai": "^0.1.9",
|
||||
"@langchain/google-vertexai": "^0.2.0",
|
||||
"@langchain/textsplitters": "^0.1.0",
|
||||
"@librechat/agents": "^2.1.2",
|
||||
"@librechat/agents": "^2.1.3",
|
||||
"@waylaidwanderer/fetch-event-source": "^3.0.1",
|
||||
"axios": "1.7.8",
|
||||
"bcryptjs": "^2.4.3",
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ const {
|
|||
} = require('librechat-data-provider');
|
||||
const {
|
||||
formatMessage,
|
||||
addCacheControl,
|
||||
formatAgentMessages,
|
||||
formatContentStrings,
|
||||
createContextHandlers,
|
||||
|
|
@ -589,7 +590,7 @@ class AgentClient extends BaseClient {
|
|||
* @param {number} [i]
|
||||
* @param {TMessageContentParts[]} [contentData]
|
||||
*/
|
||||
const runAgent = async (agent, messages, i = 0, contentData = []) => {
|
||||
const runAgent = async (agent, _messages, i = 0, contentData = []) => {
|
||||
config.configurable.model = agent.model_parameters.model;
|
||||
if (i > 0) {
|
||||
this.model = agent.model_parameters.model;
|
||||
|
|
@ -622,12 +623,21 @@ class AgentClient extends BaseClient {
|
|||
}
|
||||
|
||||
if (noSystemMessages === true && systemContent?.length) {
|
||||
let latestMessage = messages.pop().content;
|
||||
let latestMessage = _messages.pop().content;
|
||||
if (typeof latestMessage !== 'string') {
|
||||
latestMessage = latestMessage[0].text;
|
||||
}
|
||||
latestMessage = [systemContent, latestMessage].join('\n');
|
||||
messages.push(new HumanMessage(latestMessage));
|
||||
_messages.push(new HumanMessage(latestMessage));
|
||||
}
|
||||
|
||||
let messages = _messages;
|
||||
if (
|
||||
agent.model_parameters?.clientOptions?.defaultHeaders?.['anthropic-beta']?.includes(
|
||||
'prompt-caching',
|
||||
)
|
||||
) {
|
||||
messages = addCacheControl(messages);
|
||||
}
|
||||
|
||||
run = await createRun({
|
||||
|
|
|
|||
110
api/server/services/Endpoints/anthropic/helpers.js
Normal file
110
api/server/services/Endpoints/anthropic/helpers.js
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
|
||||
const { matchModelName } = require('~/utils');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
/**
|
||||
* @param {string} modelName
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function checkPromptCacheSupport(modelName) {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
|
||||
if (
|
||||
modelMatch.includes('claude-3-5-sonnet-latest') ||
|
||||
modelMatch.includes('claude-3.5-sonnet-latest')
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (
|
||||
modelMatch === 'claude-3-7-sonnet' ||
|
||||
modelMatch === 'claude-3-5-sonnet' ||
|
||||
modelMatch === 'claude-3-5-haiku' ||
|
||||
modelMatch === 'claude-3-haiku' ||
|
||||
modelMatch === 'claude-3-opus' ||
|
||||
modelMatch === 'claude-3.7-sonnet' ||
|
||||
modelMatch === 'claude-3.5-sonnet' ||
|
||||
modelMatch === 'claude-3.5-haiku'
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the appropriate headers for Claude models with cache control
|
||||
* @param {string} model The model name
|
||||
* @param {boolean} supportsCacheControl Whether the model supports cache control
|
||||
* @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
|
||||
*/
|
||||
function getClaudeHeaders(model, supportsCacheControl) {
|
||||
if (!supportsCacheControl) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (/claude-3[-.]5-sonnet/.test(model)) {
|
||||
return {
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (/claude-3[-.]7/.test(model)) {
|
||||
return {
|
||||
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures reasoning-related options for Claude models
|
||||
* @param {AnthropicClientOptions & { max_tokens?: number }} anthropicInput The request options object
|
||||
* @param {Object} extendedOptions Additional client configuration options
|
||||
* @param {boolean} extendedOptions.thinking Whether thinking is enabled in client config
|
||||
* @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
|
||||
* @returns {Object} Updated request options
|
||||
*/
|
||||
function configureReasoning(anthropicInput, extendedOptions = {}) {
|
||||
const updatedOptions = { ...anthropicInput };
|
||||
const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
|
||||
if (
|
||||
extendedOptions.thinking &&
|
||||
updatedOptions?.model &&
|
||||
/claude-3[-.]7/.test(updatedOptions.model)
|
||||
) {
|
||||
updatedOptions.thinking = {
|
||||
type: 'enabled',
|
||||
};
|
||||
}
|
||||
|
||||
if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
|
||||
updatedOptions.thinking = {
|
||||
...updatedOptions.thinking,
|
||||
budget_tokens: extendedOptions.thinkingBudget,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
updatedOptions.thinking != null &&
|
||||
(currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
|
||||
) {
|
||||
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
|
||||
updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
|
||||
|
||||
logger.warn(
|
||||
updatedOptions.max_tokens === maxTokens
|
||||
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
|
||||
: `[AnthropicClient] thinking budget_tokens (${updatedOptions.thinking.budget_tokens}) exceeds max_tokens (${updatedOptions.max_tokens}). Adjusting budget_tokens.`,
|
||||
);
|
||||
|
||||
updatedOptions.thinking.budget_tokens = Math.min(
|
||||
updatedOptions.thinking.budget_tokens,
|
||||
Math.floor(updatedOptions.max_tokens * 0.9),
|
||||
);
|
||||
}
|
||||
|
||||
return updatedOptions;
|
||||
}
|
||||
|
||||
module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
|
||||
const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
|
||||
|
||||
/**
|
||||
* Generates configuration options for creating an Anthropic language model (LLM) instance.
|
||||
|
|
@ -20,6 +21,14 @@ const { anthropicSettings, removeNullishValues } = require('librechat-data-provi
|
|||
* @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
|
||||
*/
|
||||
function getLLMConfig(apiKey, options = {}) {
|
||||
const systemOptions = {
|
||||
thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default,
|
||||
promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default,
|
||||
thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
|
||||
};
|
||||
for (let key in systemOptions) {
|
||||
delete options.modelOptions[key];
|
||||
}
|
||||
const defaultOptions = {
|
||||
model: anthropicSettings.model.default,
|
||||
maxOutputTokens: anthropicSettings.maxOutputTokens.default,
|
||||
|
|
@ -29,7 +38,7 @@ function getLLMConfig(apiKey, options = {}) {
|
|||
const mergedOptions = Object.assign(defaultOptions, options.modelOptions);
|
||||
|
||||
/** @type {AnthropicClientOptions} */
|
||||
const requestOptions = {
|
||||
let requestOptions = {
|
||||
apiKey,
|
||||
model: mergedOptions.model,
|
||||
stream: mergedOptions.stream,
|
||||
|
|
@ -42,6 +51,13 @@ function getLLMConfig(apiKey, options = {}) {
|
|||
clientOptions: {},
|
||||
};
|
||||
|
||||
const supportsCacheControl =
|
||||
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
|
||||
const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
|
||||
if (headers) {
|
||||
requestOptions.clientOptions.defaultHeaders = headers;
|
||||
}
|
||||
|
||||
if (options.proxy) {
|
||||
requestOptions.clientOptions.httpAgent = new HttpsProxyAgent(options.proxy);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue