mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00
🚀 feat: Agent Cache Tokens & Anthropic Reasoning Support (#6098)
* fix: handling of top_k and top_p parameters for Claude-3.7 models (allowed without reasoning) * feat: bump @librechat/agents for Anthropic Reasoning support * fix: update reasoning handling for OpenRouter integration * fix: enhance agent token spending logic to include cache creation and read details * fix: update logic for thinking status in ContentParts component * refactor: improve agent title handling * chore: bump @librechat/agents to version 2.1.7 for parallel tool calling for Google models
This commit is contained in:
parent
34f967eff8
commit
9802629848
11 changed files with 187 additions and 40 deletions
|
@ -746,15 +746,6 @@ class AnthropicClient extends BaseClient {
|
||||||
metadata,
|
metadata,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!/claude-3[-.]7/.test(model)) {
|
|
||||||
if (top_p !== undefined) {
|
|
||||||
requestOptions.top_p = top_p;
|
|
||||||
}
|
|
||||||
if (top_k !== undefined) {
|
|
||||||
requestOptions.top_k = top_k;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.useMessages) {
|
if (this.useMessages) {
|
||||||
requestOptions.messages = payload;
|
requestOptions.messages = payload;
|
||||||
requestOptions.max_tokens =
|
requestOptions.max_tokens =
|
||||||
|
@ -769,6 +760,14 @@ class AnthropicClient extends BaseClient {
|
||||||
thinkingBudget: this.options.thinkingBudget,
|
thinkingBudget: this.options.thinkingBudget,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (!/claude-3[-.]7/.test(model)) {
|
||||||
|
requestOptions.top_p = top_p;
|
||||||
|
requestOptions.top_k = top_k;
|
||||||
|
} else if (requestOptions.thinking == null) {
|
||||||
|
requestOptions.topP = top_p;
|
||||||
|
requestOptions.topK = top_k;
|
||||||
|
}
|
||||||
|
|
||||||
if (this.systemMessage && this.supportsCacheControl === true) {
|
if (this.systemMessage && this.supportsCacheControl === true) {
|
||||||
requestOptions.system = [
|
requestOptions.system = [
|
||||||
{
|
{
|
||||||
|
|
|
@ -1309,6 +1309,12 @@ ${convo}
|
||||||
modelOptions.include_reasoning = true;
|
modelOptions.include_reasoning = true;
|
||||||
reasoningKey = 'reasoning';
|
reasoningKey = 'reasoning';
|
||||||
}
|
}
|
||||||
|
if (this.useOpenRouter && modelOptions.reasoning_effort != null) {
|
||||||
|
modelOptions.reasoning = {
|
||||||
|
effort: modelOptions.reasoning_effort,
|
||||||
|
};
|
||||||
|
delete modelOptions.reasoning_effort;
|
||||||
|
}
|
||||||
|
|
||||||
this.streamHandler = new SplitStreamHandler({
|
this.streamHandler = new SplitStreamHandler({
|
||||||
reasoningKey,
|
reasoningKey,
|
||||||
|
|
|
@ -680,4 +680,53 @@ describe('AnthropicClient', () => {
|
||||||
expect(capturedOptions).not.toHaveProperty('top_p');
|
expect(capturedOptions).not.toHaveProperty('top_p');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should include top_k and top_p parameters for Claude-3.7 models when thinking is explicitly disabled', async () => {
|
||||||
|
const client = new AnthropicClient('test-api-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model: 'claude-3-7-sonnet',
|
||||||
|
temperature: 0.7,
|
||||||
|
topK: 10,
|
||||||
|
topP: 0.9,
|
||||||
|
},
|
||||||
|
thinking: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
async function* mockAsyncGenerator() {
|
||||||
|
yield { type: 'message_start', message: { usage: {} } };
|
||||||
|
yield { delta: { text: 'Test response' } };
|
||||||
|
yield { type: 'message_delta', usage: {} };
|
||||||
|
}
|
||||||
|
|
||||||
|
jest.spyOn(client, 'createResponse').mockImplementation(() => {
|
||||||
|
return mockAsyncGenerator();
|
||||||
|
});
|
||||||
|
|
||||||
|
let capturedOptions = null;
|
||||||
|
jest.spyOn(client, 'getClient').mockImplementation((options) => {
|
||||||
|
capturedOptions = options;
|
||||||
|
return {};
|
||||||
|
});
|
||||||
|
|
||||||
|
const payload = [{ role: 'user', content: 'Test message' }];
|
||||||
|
await client.sendCompletion(payload, {});
|
||||||
|
|
||||||
|
expect(capturedOptions).toHaveProperty('topK', 10);
|
||||||
|
expect(capturedOptions).toHaveProperty('topP', 0.9);
|
||||||
|
|
||||||
|
client.setOptions({
|
||||||
|
modelOptions: {
|
||||||
|
model: 'claude-3.7-sonnet',
|
||||||
|
temperature: 0.7,
|
||||||
|
topK: 10,
|
||||||
|
topP: 0.9,
|
||||||
|
},
|
||||||
|
thinking: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await client.sendCompletion(payload, {});
|
||||||
|
|
||||||
|
expect(capturedOptions).toHaveProperty('topK', 10);
|
||||||
|
expect(capturedOptions).toHaveProperty('topP', 0.9);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -45,7 +45,7 @@
|
||||||
"@langchain/google-genai": "^0.1.9",
|
"@langchain/google-genai": "^0.1.9",
|
||||||
"@langchain/google-vertexai": "^0.2.0",
|
"@langchain/google-vertexai": "^0.2.0",
|
||||||
"@langchain/textsplitters": "^0.1.0",
|
"@langchain/textsplitters": "^0.1.0",
|
||||||
"@librechat/agents": "^2.1.3",
|
"@librechat/agents": "^2.1.7",
|
||||||
"@waylaidwanderer/fetch-event-source": "^3.0.1",
|
"@waylaidwanderer/fetch-event-source": "^3.0.1",
|
||||||
"axios": "1.7.8",
|
"axios": "1.7.8",
|
||||||
"bcryptjs": "^2.4.3",
|
"bcryptjs": "^2.4.3",
|
||||||
|
|
|
@ -27,10 +27,10 @@ const {
|
||||||
formatContentStrings,
|
formatContentStrings,
|
||||||
createContextHandlers,
|
createContextHandlers,
|
||||||
} = require('~/app/clients/prompts');
|
} = require('~/app/clients/prompts');
|
||||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||||
const { getBufferString, HumanMessage } = require('@langchain/core/messages');
|
const { getBufferString, HumanMessage } = require('@langchain/core/messages');
|
||||||
|
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||||
const Tokenizer = require('~/server/services/Tokenizer');
|
const Tokenizer = require('~/server/services/Tokenizer');
|
||||||
const { spendTokens } = require('~/models/spendTokens');
|
|
||||||
const BaseClient = require('~/app/clients/BaseClient');
|
const BaseClient = require('~/app/clients/BaseClient');
|
||||||
const { createRun } = require('./run');
|
const { createRun } = require('./run');
|
||||||
const { logger } = require('~/config');
|
const { logger } = require('~/config');
|
||||||
|
@ -380,15 +380,34 @@ class AgentClient extends BaseClient {
|
||||||
if (!collectedUsage || !collectedUsage.length) {
|
if (!collectedUsage || !collectedUsage.length) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const input_tokens = collectedUsage[0]?.input_tokens || 0;
|
const input_tokens =
|
||||||
|
(collectedUsage[0]?.input_tokens || 0) +
|
||||||
|
(Number(collectedUsage[0]?.input_token_details?.cache_creation) || 0) +
|
||||||
|
(Number(collectedUsage[0]?.input_token_details?.cache_read) || 0);
|
||||||
|
|
||||||
let output_tokens = 0;
|
let output_tokens = 0;
|
||||||
let previousTokens = input_tokens; // Start with original input
|
let previousTokens = input_tokens; // Start with original input
|
||||||
for (let i = 0; i < collectedUsage.length; i++) {
|
for (let i = 0; i < collectedUsage.length; i++) {
|
||||||
const usage = collectedUsage[i];
|
const usage = collectedUsage[i];
|
||||||
|
if (!usage) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const cache_creation = Number(usage.input_token_details?.cache_creation) || 0;
|
||||||
|
const cache_read = Number(usage.input_token_details?.cache_read) || 0;
|
||||||
|
|
||||||
|
const txMetadata = {
|
||||||
|
context,
|
||||||
|
conversationId: this.conversationId,
|
||||||
|
user: this.user ?? this.options.req.user?.id,
|
||||||
|
endpointTokenConfig: this.options.endpointTokenConfig,
|
||||||
|
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
|
||||||
|
};
|
||||||
|
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
// Count new tokens generated (input_tokens minus previous accumulated tokens)
|
// Count new tokens generated (input_tokens minus previous accumulated tokens)
|
||||||
output_tokens += (Number(usage.input_tokens) || 0) - previousTokens;
|
output_tokens +=
|
||||||
|
(Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add this message's output tokens
|
// Add this message's output tokens
|
||||||
|
@ -396,16 +415,26 @@ class AgentClient extends BaseClient {
|
||||||
|
|
||||||
// Update previousTokens to include this message's output
|
// Update previousTokens to include this message's output
|
||||||
previousTokens += Number(usage.output_tokens) || 0;
|
previousTokens += Number(usage.output_tokens) || 0;
|
||||||
spendTokens(
|
|
||||||
{
|
if (cache_creation > 0 || cache_read > 0) {
|
||||||
context,
|
spendStructuredTokens(txMetadata, {
|
||||||
conversationId: this.conversationId,
|
promptTokens: {
|
||||||
user: this.user ?? this.options.req.user?.id,
|
input: usage.input_tokens,
|
||||||
endpointTokenConfig: this.options.endpointTokenConfig,
|
write: cache_creation,
|
||||||
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
|
read: cache_read,
|
||||||
},
|
},
|
||||||
{ promptTokens: usage.input_tokens, completionTokens: usage.output_tokens },
|
completionTokens: usage.output_tokens,
|
||||||
).catch((err) => {
|
}).catch((err) => {
|
||||||
|
logger.error(
|
||||||
|
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens',
|
||||||
|
err,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
spendTokens(txMetadata, {
|
||||||
|
promptTokens: usage.input_tokens,
|
||||||
|
completionTokens: usage.output_tokens,
|
||||||
|
}).catch((err) => {
|
||||||
logger.error(
|
logger.error(
|
||||||
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
|
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
|
||||||
err,
|
err,
|
||||||
|
@ -792,7 +821,10 @@ class AgentClient extends BaseClient {
|
||||||
throw new Error('Run not initialized');
|
throw new Error('Run not initialized');
|
||||||
}
|
}
|
||||||
const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
|
const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
|
||||||
const clientOptions = {};
|
/** @type {import('@librechat/agents').ClientOptions} */
|
||||||
|
const clientOptions = {
|
||||||
|
maxTokens: 75,
|
||||||
|
};
|
||||||
const providerConfig = this.options.req.app.locals[this.options.agent.provider];
|
const providerConfig = this.options.req.app.locals[this.options.agent.provider];
|
||||||
if (
|
if (
|
||||||
providerConfig &&
|
providerConfig &&
|
||||||
|
|
|
@ -20,10 +20,19 @@ const addTitle = async (req, { text, response, client }) => {
|
||||||
|
|
||||||
const titleCache = getLogStores(CacheKeys.GEN_TITLE);
|
const titleCache = getLogStores(CacheKeys.GEN_TITLE);
|
||||||
const key = `${req.user.id}-${response.conversationId}`;
|
const key = `${req.user.id}-${response.conversationId}`;
|
||||||
|
const responseText =
|
||||||
|
response?.content && Array.isArray(response?.content)
|
||||||
|
? response.content.reduce((acc, block) => {
|
||||||
|
if (block?.type === 'text') {
|
||||||
|
return acc + block.text;
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, '')
|
||||||
|
: (response?.content ?? response?.text ?? '');
|
||||||
|
|
||||||
const title = await client.titleConvo({
|
const title = await client.titleConvo({
|
||||||
text,
|
text,
|
||||||
responseText: response?.text ?? '',
|
responseText,
|
||||||
conversationId: response.conversationId,
|
conversationId: response.conversationId,
|
||||||
});
|
});
|
||||||
await titleCache.set(key, title, 120000);
|
await titleCache.set(key, title, 120000);
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||||
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
|
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
|
||||||
const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
|
const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates configuration options for creating an Anthropic language model (LLM) instance.
|
* Generates configuration options for creating an Anthropic language model (LLM) instance.
|
||||||
|
@ -49,13 +49,14 @@ function getLLMConfig(apiKey, options = {}) {
|
||||||
clientOptions: {},
|
clientOptions: {},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
requestOptions = configureReasoning(requestOptions, systemOptions);
|
||||||
|
|
||||||
if (!/claude-3[-.]7/.test(mergedOptions.model)) {
|
if (!/claude-3[-.]7/.test(mergedOptions.model)) {
|
||||||
if (mergedOptions.topP !== undefined) {
|
requestOptions.topP = mergedOptions.topP;
|
||||||
requestOptions.topP = mergedOptions.topP;
|
requestOptions.topK = mergedOptions.topK;
|
||||||
}
|
} else if (requestOptions.thinking == null) {
|
||||||
if (mergedOptions.topK !== undefined) {
|
requestOptions.topP = mergedOptions.topP;
|
||||||
requestOptions.topK = mergedOptions.topK;
|
requestOptions.topK = mergedOptions.topK;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const supportsCacheControl =
|
const supportsCacheControl =
|
||||||
|
|
|
@ -109,4 +109,45 @@ describe('getLLMConfig', () => {
|
||||||
// Just verifying that the promptCache setting is processed
|
// Just verifying that the promptCache setting is processed
|
||||||
expect(result.llmConfig).toBeDefined();
|
expect(result.llmConfig).toBeDefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should include topK and topP for Claude-3.7 models when thinking is not enabled', () => {
|
||||||
|
// Test with thinking explicitly set to null/undefined
|
||||||
|
const result = getLLMConfig('test-api-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model: 'claude-3-7-sonnet',
|
||||||
|
topK: 10,
|
||||||
|
topP: 0.9,
|
||||||
|
thinking: false,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.llmConfig).toHaveProperty('topK', 10);
|
||||||
|
expect(result.llmConfig).toHaveProperty('topP', 0.9);
|
||||||
|
|
||||||
|
// Test with thinking explicitly set to false
|
||||||
|
const result2 = getLLMConfig('test-api-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model: 'claude-3-7-sonnet',
|
||||||
|
topK: 10,
|
||||||
|
topP: 0.9,
|
||||||
|
thinking: false,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result2.llmConfig).toHaveProperty('topK', 10);
|
||||||
|
expect(result2.llmConfig).toHaveProperty('topP', 0.9);
|
||||||
|
|
||||||
|
// Test with decimal notation as well
|
||||||
|
const result3 = getLLMConfig('test-api-key', {
|
||||||
|
modelOptions: {
|
||||||
|
model: 'claude-3.7-sonnet',
|
||||||
|
topK: 10,
|
||||||
|
topP: 0.9,
|
||||||
|
thinking: false,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result3.llmConfig).toHaveProperty('topK', 10);
|
||||||
|
expect(result3.llmConfig).toHaveProperty('topP', 0.9);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -29,7 +29,6 @@ function getLLMConfig(apiKey, options = {}) {
|
||||||
const {
|
const {
|
||||||
modelOptions = {},
|
modelOptions = {},
|
||||||
reverseProxyUrl,
|
reverseProxyUrl,
|
||||||
useOpenRouter,
|
|
||||||
defaultQuery,
|
defaultQuery,
|
||||||
headers,
|
headers,
|
||||||
proxy,
|
proxy,
|
||||||
|
@ -56,9 +55,11 @@ function getLLMConfig(apiKey, options = {}) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let useOpenRouter;
|
||||||
/** @type {OpenAIClientOptions['configuration']} */
|
/** @type {OpenAIClientOptions['configuration']} */
|
||||||
const configOptions = {};
|
const configOptions = {};
|
||||||
if (useOpenRouter || (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter))) {
|
if (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) {
|
||||||
|
useOpenRouter = true;
|
||||||
llmConfig.include_reasoning = true;
|
llmConfig.include_reasoning = true;
|
||||||
configOptions.baseURL = reverseProxyUrl;
|
configOptions.baseURL = reverseProxyUrl;
|
||||||
configOptions.defaultHeaders = Object.assign(
|
configOptions.defaultHeaders = Object.assign(
|
||||||
|
@ -118,6 +119,13 @@ function getLLMConfig(apiKey, options = {}) {
|
||||||
llmConfig.organization = process.env.OPENAI_ORGANIZATION;
|
llmConfig.organization = process.env.OPENAI_ORGANIZATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (useOpenRouter && llmConfig.reasoning_effort != null) {
|
||||||
|
llmConfig.reasoning = {
|
||||||
|
effort: llmConfig.reasoning_effort,
|
||||||
|
};
|
||||||
|
delete llmConfig.reasoning_effort;
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
/** @type {OpenAIClientOptions} */
|
/** @type {OpenAIClientOptions} */
|
||||||
llmConfig,
|
llmConfig,
|
||||||
|
|
|
@ -109,7 +109,9 @@ const ContentParts = memo(
|
||||||
return val;
|
return val;
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
label={isSubmitting ? localize('com_ui_thinking') : localize('com_ui_thoughts')}
|
label={
|
||||||
|
isSubmitting && isLast ? localize('com_ui_thinking') : localize('com_ui_thoughts')
|
||||||
|
}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
8
package-lock.json
generated
8
package-lock.json
generated
|
@ -61,7 +61,7 @@
|
||||||
"@langchain/google-genai": "^0.1.9",
|
"@langchain/google-genai": "^0.1.9",
|
||||||
"@langchain/google-vertexai": "^0.2.0",
|
"@langchain/google-vertexai": "^0.2.0",
|
||||||
"@langchain/textsplitters": "^0.1.0",
|
"@langchain/textsplitters": "^0.1.0",
|
||||||
"@librechat/agents": "^2.1.3",
|
"@librechat/agents": "^2.1.7",
|
||||||
"@waylaidwanderer/fetch-event-source": "^3.0.1",
|
"@waylaidwanderer/fetch-event-source": "^3.0.1",
|
||||||
"axios": "1.7.8",
|
"axios": "1.7.8",
|
||||||
"bcryptjs": "^2.4.3",
|
"bcryptjs": "^2.4.3",
|
||||||
|
@ -15984,9 +15984,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@librechat/agents": {
|
"node_modules/@librechat/agents": {
|
||||||
"version": "2.1.3",
|
"version": "2.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.1.7.tgz",
|
||||||
"integrity": "sha512-4pPkLpjhA3DDiZQOULcrpbdQaOBC4JuUMdcVTUyYBHcA63SJT3olstmRQkGKNvoXLFLeQyJ0jkOqkEpzLJzk/g==",
|
"integrity": "sha512-/+AvxH75K0dSSUeHqT8jPZCcqcQUWdB56g9ls7ho0Nw9vdxfezBhF/hXnOk5oORHeEXlGEKNE6YPyjAhCmNIOg==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-crypto/sha256-js": "^5.2.0",
|
"@aws-crypto/sha256-js": "^5.2.0",
|
||||||
"@aws-sdk/credential-provider-node": "^3.613.0",
|
"@aws-sdk/credential-provider-node": "^3.613.0",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue