mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
🚀 feat: Agent Cache Tokens & Anthropic Reasoning Support (#6098)
* fix: handling of top_k and top_p parameters for Claude-3.7 models (allowed without reasoning) * feat: bump @librechat/agents for Anthropic Reasoning support * fix: update reasoning handling for OpenRouter integration * fix: enhance agent token spending logic to include cache creation and read details * fix: update logic for thinking status in ContentParts component * refactor: improve agent title handling * chore: bump @librechat/agents to version 2.1.7 for parallel tool calling for Google models
This commit is contained in:
parent
34f967eff8
commit
9802629848
11 changed files with 187 additions and 40 deletions
|
|
@ -27,10 +27,10 @@ const {
|
|||
formatContentStrings,
|
||||
createContextHandlers,
|
||||
} = require('~/app/clients/prompts');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||
const { getBufferString, HumanMessage } = require('@langchain/core/messages');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const BaseClient = require('~/app/clients/BaseClient');
|
||||
const { createRun } = require('./run');
|
||||
const { logger } = require('~/config');
|
||||
|
|
@ -380,15 +380,34 @@ class AgentClient extends BaseClient {
|
|||
if (!collectedUsage || !collectedUsage.length) {
|
||||
return;
|
||||
}
|
||||
const input_tokens = collectedUsage[0]?.input_tokens || 0;
|
||||
const input_tokens =
|
||||
(collectedUsage[0]?.input_tokens || 0) +
|
||||
(Number(collectedUsage[0]?.input_token_details?.cache_creation) || 0) +
|
||||
(Number(collectedUsage[0]?.input_token_details?.cache_read) || 0);
|
||||
|
||||
let output_tokens = 0;
|
||||
let previousTokens = input_tokens; // Start with original input
|
||||
for (let i = 0; i < collectedUsage.length; i++) {
|
||||
const usage = collectedUsage[i];
|
||||
if (!usage) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const cache_creation = Number(usage.input_token_details?.cache_creation) || 0;
|
||||
const cache_read = Number(usage.input_token_details?.cache_read) || 0;
|
||||
|
||||
const txMetadata = {
|
||||
context,
|
||||
conversationId: this.conversationId,
|
||||
user: this.user ?? this.options.req.user?.id,
|
||||
endpointTokenConfig: this.options.endpointTokenConfig,
|
||||
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
|
||||
};
|
||||
|
||||
if (i > 0) {
|
||||
// Count new tokens generated (input_tokens minus previous accumulated tokens)
|
||||
output_tokens += (Number(usage.input_tokens) || 0) - previousTokens;
|
||||
output_tokens +=
|
||||
(Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens;
|
||||
}
|
||||
|
||||
// Add this message's output tokens
|
||||
|
|
@ -396,16 +415,26 @@ class AgentClient extends BaseClient {
|
|||
|
||||
// Update previousTokens to include this message's output
|
||||
previousTokens += Number(usage.output_tokens) || 0;
|
||||
spendTokens(
|
||||
{
|
||||
context,
|
||||
conversationId: this.conversationId,
|
||||
user: this.user ?? this.options.req.user?.id,
|
||||
endpointTokenConfig: this.options.endpointTokenConfig,
|
||||
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
|
||||
},
|
||||
{ promptTokens: usage.input_tokens, completionTokens: usage.output_tokens },
|
||||
).catch((err) => {
|
||||
|
||||
if (cache_creation > 0 || cache_read > 0) {
|
||||
spendStructuredTokens(txMetadata, {
|
||||
promptTokens: {
|
||||
input: usage.input_tokens,
|
||||
write: cache_creation,
|
||||
read: cache_read,
|
||||
},
|
||||
completionTokens: usage.output_tokens,
|
||||
}).catch((err) => {
|
||||
logger.error(
|
||||
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens',
|
||||
err,
|
||||
);
|
||||
});
|
||||
}
|
||||
spendTokens(txMetadata, {
|
||||
promptTokens: usage.input_tokens,
|
||||
completionTokens: usage.output_tokens,
|
||||
}).catch((err) => {
|
||||
logger.error(
|
||||
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
|
||||
err,
|
||||
|
|
@ -792,7 +821,10 @@ class AgentClient extends BaseClient {
|
|||
throw new Error('Run not initialized');
|
||||
}
|
||||
const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
|
||||
const clientOptions = {};
|
||||
/** @type {import('@librechat/agents').ClientOptions} */
|
||||
const clientOptions = {
|
||||
maxTokens: 75,
|
||||
};
|
||||
const providerConfig = this.options.req.app.locals[this.options.agent.provider];
|
||||
if (
|
||||
providerConfig &&
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue