mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-22 10:16:13 +01:00
* fix: Token Spending Logic for Multi-Agents on Abort Scenarios * Implemented logic to skip token spending if a conversation is aborted, preventing double-spending. * Introduced `spendCollectedUsage` function to handle token spending for multiple models during aborts, ensuring accurate accounting for parallel agents. * Updated `GenerationJobManager` to store and retrieve collected usage data for improved abort handling. * Added comprehensive tests for the new functionality, covering various scenarios including cache token handling and parallel agent usage. * fix: Memory Context Handling for Multi-Agents * Refactored `buildMessages` method to pass memory context to parallel agents, ensuring they share the same user context. * Improved handling of memory context when no existing instructions are present for parallel agents. * Added comprehensive tests to verify memory context propagation and behavior under various scenarios, including cases with no memory available and empty agent configurations. * Enhanced logging for better traceability of memory context additions to agents. * chore: Memory Context Documentation for Parallel Agents * Updated documentation in the `AgentClient` class to clarify the in-place mutation of agentConfig objects when passing memory context to parallel agents. * Added notes on the implications of mutating objects directly to ensure all parallel agents receive the correct memory context before execution. * chore: UsageMetadata Interface docs for Token Spending * Expanded the UsageMetadata interface to support both OpenAI and Anthropic cache token formats. * Added detailed documentation for cache token properties, including mutually exclusive fields for different model types. * Improved clarity on how to access cache token details for accurate token spending tracking. * fix: Enhance Token Spending Logic in Abort Middleware * Refactored `spendCollectedUsage` function to utilize Promise.all for concurrent token spending, improving performance and ensuring all operations complete before clearing the collectedUsage array. * Added documentation to clarify the importance of clearing the collectedUsage array to prevent double-spending in abort scenarios. * Updated tests to verify the correct behavior of the spending logic and the clearing of the array after spending operations.
1414 lines
45 KiB
JavaScript
1414 lines
45 KiB
JavaScript
require('events').EventEmitter.defaultMaxListeners = 100;
|
|
const { logger } = require('@librechat/data-schemas');
|
|
const { DynamicStructuredTool } = require('@langchain/core/tools');
|
|
const { getBufferString, HumanMessage } = require('@langchain/core/messages');
|
|
const {
|
|
createRun,
|
|
Tokenizer,
|
|
checkAccess,
|
|
logAxiosError,
|
|
sanitizeTitle,
|
|
resolveHeaders,
|
|
createSafeUser,
|
|
initializeAgent,
|
|
getBalanceConfig,
|
|
getProviderConfig,
|
|
memoryInstructions,
|
|
GenerationJobManager,
|
|
getTransactionsConfig,
|
|
createMemoryProcessor,
|
|
filterMalformedContentParts,
|
|
} = require('@librechat/api');
|
|
const {
|
|
Callback,
|
|
Providers,
|
|
TitleMethod,
|
|
formatMessage,
|
|
labelContentByAgent,
|
|
formatAgentMessages,
|
|
getTokenCountForMessage,
|
|
createMetadataAggregator,
|
|
} = require('@librechat/agents');
|
|
const {
|
|
Constants,
|
|
Permissions,
|
|
VisionModes,
|
|
ContentTypes,
|
|
EModelEndpoint,
|
|
PermissionTypes,
|
|
isAgentsEndpoint,
|
|
isEphemeralAgentId,
|
|
bedrockInputSchema,
|
|
removeNullishValues,
|
|
} = require('librechat-data-provider');
|
|
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
|
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
|
const { createContextHandlers } = require('~/app/clients/prompts');
|
|
const { getConvoFiles } = require('~/models/Conversation');
|
|
const BaseClient = require('~/app/clients/BaseClient');
|
|
const { getRoleByName } = require('~/models/Role');
|
|
const { loadAgent } = require('~/models/Agent');
|
|
const { getMCPManager } = require('~/config');
|
|
const db = require('~/models');
|
|
|
|
const omitTitleOptions = new Set([
|
|
'stream',
|
|
'thinking',
|
|
'streaming',
|
|
'clientOptions',
|
|
'thinkingConfig',
|
|
'thinkingBudget',
|
|
'includeThoughts',
|
|
'maxOutputTokens',
|
|
'additionalModelRequestFields',
|
|
]);
|
|
|
|
/**
|
|
* @param {ServerRequest} req
|
|
* @param {Agent} agent
|
|
* @param {string} endpoint
|
|
*/
|
|
const payloadParser = ({ req, agent, endpoint }) => {
|
|
if (isAgentsEndpoint(endpoint)) {
|
|
return { model: undefined };
|
|
} else if (endpoint === EModelEndpoint.bedrock) {
|
|
const parsedValues = bedrockInputSchema.parse(agent.model_parameters);
|
|
if (parsedValues.thinking == null) {
|
|
parsedValues.thinking = false;
|
|
}
|
|
return parsedValues;
|
|
}
|
|
return req.body.endpointOption.model_parameters;
|
|
};
|
|
|
|
function createTokenCounter(encoding) {
|
|
return function (message) {
|
|
const countTokens = (text) => Tokenizer.getTokenCount(text, encoding);
|
|
return getTokenCountForMessage(message, countTokens);
|
|
};
|
|
}
|
|
|
|
function logToolError(graph, error, toolId) {
|
|
logAxiosError({
|
|
error,
|
|
message: `[api/server/controllers/agents/client.js #chatCompletion] Tool Error "${toolId}"`,
|
|
});
|
|
}
|
|
|
|
/** Regex pattern to match agent ID suffix (____N) */
|
|
const AGENT_SUFFIX_PATTERN = /____(\d+)$/;
|
|
|
|
/**
|
|
* Finds the primary agent ID within a set of agent IDs.
|
|
* Primary = no suffix (____N) or lowest suffix number.
|
|
* @param {Set<string>} agentIds
|
|
* @returns {string | null}
|
|
*/
|
|
function findPrimaryAgentId(agentIds) {
|
|
let primaryAgentId = null;
|
|
let lowestSuffixIndex = Infinity;
|
|
|
|
for (const agentId of agentIds) {
|
|
const suffixMatch = agentId.match(AGENT_SUFFIX_PATTERN);
|
|
if (!suffixMatch) {
|
|
return agentId;
|
|
}
|
|
const suffixIndex = parseInt(suffixMatch[1], 10);
|
|
if (suffixIndex < lowestSuffixIndex) {
|
|
lowestSuffixIndex = suffixIndex;
|
|
primaryAgentId = agentId;
|
|
}
|
|
}
|
|
|
|
return primaryAgentId;
|
|
}
|
|
|
|
/**
|
|
* Creates a mapMethod for getMessagesForConversation that processes agent content.
|
|
* - Strips agentId/groupId metadata from all content
|
|
* - For parallel agents (addedConvo with groupId): filters each group to its primary agent
|
|
* - For handoffs (agentId without groupId): keeps all content from all agents
|
|
* - For multi-agent: applies agent labels to content
|
|
*
|
|
* The key distinction:
|
|
* - Parallel execution (addedConvo): Parts have both agentId AND groupId
|
|
* - Handoffs: Parts only have agentId, no groupId
|
|
*
|
|
* @param {Agent} primaryAgent - Primary agent configuration
|
|
* @param {Map<string, Agent>} [agentConfigs] - Additional agent configurations
|
|
* @returns {(message: TMessage) => TMessage} Map method for processing messages
|
|
*/
|
|
function createMultiAgentMapper(primaryAgent, agentConfigs) {
|
|
const hasMultipleAgents = (primaryAgent.edges?.length ?? 0) > 0 || (agentConfigs?.size ?? 0) > 0;
|
|
|
|
/** @type {Record<string, string> | null} */
|
|
let agentNames = null;
|
|
if (hasMultipleAgents) {
|
|
agentNames = { [primaryAgent.id]: primaryAgent.name || 'Assistant' };
|
|
if (agentConfigs) {
|
|
for (const [agentId, agentConfig] of agentConfigs.entries()) {
|
|
agentNames[agentId] = agentConfig.name || agentConfig.id;
|
|
}
|
|
}
|
|
}
|
|
|
|
return (message) => {
|
|
if (message.isCreatedByUser || !Array.isArray(message.content)) {
|
|
return message;
|
|
}
|
|
|
|
// Check for metadata
|
|
const hasAgentMetadata = message.content.some((part) => part?.agentId || part?.groupId != null);
|
|
if (!hasAgentMetadata) {
|
|
return message;
|
|
}
|
|
|
|
try {
|
|
// Build a map of groupId -> Set of agentIds, to find primary per group
|
|
/** @type {Map<number, Set<string>>} */
|
|
const groupAgentMap = new Map();
|
|
|
|
for (const part of message.content) {
|
|
const groupId = part?.groupId;
|
|
const agentId = part?.agentId;
|
|
if (groupId != null && agentId) {
|
|
if (!groupAgentMap.has(groupId)) {
|
|
groupAgentMap.set(groupId, new Set());
|
|
}
|
|
groupAgentMap.get(groupId).add(agentId);
|
|
}
|
|
}
|
|
|
|
// For each group, find the primary agent
|
|
/** @type {Map<number, string>} */
|
|
const groupPrimaryMap = new Map();
|
|
for (const [groupId, agentIds] of groupAgentMap) {
|
|
const primary = findPrimaryAgentId(agentIds);
|
|
if (primary) {
|
|
groupPrimaryMap.set(groupId, primary);
|
|
}
|
|
}
|
|
|
|
/** @type {Array<TMessageContentParts>} */
|
|
const filteredContent = [];
|
|
/** @type {Record<number, string>} */
|
|
const agentIdMap = {};
|
|
|
|
for (const part of message.content) {
|
|
const agentId = part?.agentId;
|
|
const groupId = part?.groupId;
|
|
|
|
// Filtering logic:
|
|
// - No groupId (handoffs): always include
|
|
// - Has groupId (parallel): only include if it's the primary for that group
|
|
const isParallelPart = groupId != null;
|
|
const groupPrimary = isParallelPart ? groupPrimaryMap.get(groupId) : null;
|
|
const shouldInclude = !isParallelPart || !agentId || agentId === groupPrimary;
|
|
|
|
if (shouldInclude) {
|
|
const newIndex = filteredContent.length;
|
|
const { agentId: _a, groupId: _g, ...cleanPart } = part;
|
|
filteredContent.push(cleanPart);
|
|
if (agentId && hasMultipleAgents) {
|
|
agentIdMap[newIndex] = agentId;
|
|
}
|
|
}
|
|
}
|
|
|
|
const finalContent =
|
|
Object.keys(agentIdMap).length > 0 && agentNames
|
|
? labelContentByAgent(filteredContent, agentIdMap, agentNames)
|
|
: filteredContent;
|
|
|
|
return { ...message, content: finalContent };
|
|
} catch (error) {
|
|
logger.error('[AgentClient] Error processing multi-agent message:', error);
|
|
return message;
|
|
}
|
|
};
|
|
}
|
|
|
|
class AgentClient extends BaseClient {
|
|
constructor(options = {}) {
|
|
super(null, options);
|
|
/** The current client class
|
|
* @type {string} */
|
|
this.clientName = EModelEndpoint.agents;
|
|
|
|
/** @type {'discard' | 'summarize'} */
|
|
this.contextStrategy = 'discard';
|
|
|
|
/** @deprecated @type {true} - Is a Chat Completion Request */
|
|
this.isChatCompletion = true;
|
|
|
|
/** @type {AgentRun} */
|
|
this.run;
|
|
|
|
const {
|
|
agentConfigs,
|
|
contentParts,
|
|
collectedUsage,
|
|
artifactPromises,
|
|
maxContextTokens,
|
|
...clientOptions
|
|
} = options;
|
|
|
|
this.agentConfigs = agentConfigs;
|
|
this.maxContextTokens = maxContextTokens;
|
|
/** @type {MessageContentComplex[]} */
|
|
this.contentParts = contentParts;
|
|
/** @type {Array<UsageMetadata>} */
|
|
this.collectedUsage = collectedUsage;
|
|
/** @type {ArtifactPromises} */
|
|
this.artifactPromises = artifactPromises;
|
|
/** @type {AgentClientOptions} */
|
|
this.options = Object.assign({ endpoint: options.endpoint }, clientOptions);
|
|
/** @type {string} */
|
|
this.model = this.options.agent.model_parameters.model;
|
|
/** The key for the usage object's input tokens
|
|
* @type {string} */
|
|
this.inputTokensKey = 'input_tokens';
|
|
/** The key for the usage object's output tokens
|
|
* @type {string} */
|
|
this.outputTokensKey = 'output_tokens';
|
|
/** @type {UsageMetadata} */
|
|
this.usage;
|
|
/** @type {Record<string, number>} */
|
|
this.indexTokenCountMap = {};
|
|
/** @type {(messages: BaseMessage[]) => Promise<void>} */
|
|
this.processMemory;
|
|
}
|
|
|
|
/**
|
|
* Returns the aggregated content parts for the current run.
|
|
* @returns {MessageContentComplex[]} */
|
|
getContentParts() {
|
|
return this.contentParts;
|
|
}
|
|
|
|
setOptions(_options) {}
|
|
|
|
/**
|
|
* `AgentClient` is not opinionated about vision requests, so we don't do anything here
|
|
* @param {MongoFile[]} attachments
|
|
*/
|
|
checkVisionRequest() {}
|
|
|
|
getSaveOptions() {
|
|
// TODO:
|
|
// would need to be override settings; otherwise, model needs to be undefined
|
|
// model: this.override.model,
|
|
// instructions: this.override.instructions,
|
|
// additional_instructions: this.override.additional_instructions,
|
|
let runOptions = {};
|
|
try {
|
|
runOptions = payloadParser(this.options);
|
|
} catch (error) {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #getSaveOptions] Error parsing options',
|
|
error,
|
|
);
|
|
}
|
|
|
|
return removeNullishValues(
|
|
Object.assign(
|
|
{
|
|
endpoint: this.options.endpoint,
|
|
agent_id: this.options.agent.id,
|
|
modelLabel: this.options.modelLabel,
|
|
maxContextTokens: this.options.maxContextTokens,
|
|
resendFiles: this.options.resendFiles,
|
|
imageDetail: this.options.imageDetail,
|
|
spec: this.options.spec,
|
|
iconURL: this.options.iconURL,
|
|
},
|
|
// TODO: PARSE OPTIONS BY PROVIDER, MAY CONTAIN SENSITIVE DATA
|
|
runOptions,
|
|
),
|
|
);
|
|
}
|
|
|
|
getBuildMessagesOptions() {
|
|
return {
|
|
instructions: this.options.agent.instructions,
|
|
additional_instructions: this.options.agent.additional_instructions,
|
|
};
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {TMessage} message
|
|
* @param {Array<MongoFile>} attachments
|
|
* @returns {Promise<Array<Partial<MongoFile>>>}
|
|
*/
|
|
async addImageURLs(message, attachments) {
|
|
const { files, image_urls } = await encodeAndFormat(
|
|
this.options.req,
|
|
attachments,
|
|
{
|
|
provider: this.options.agent.provider,
|
|
endpoint: this.options.endpoint,
|
|
},
|
|
VisionModes.agents,
|
|
);
|
|
message.image_urls = image_urls.length ? image_urls : undefined;
|
|
return files;
|
|
}
|
|
|
|
async buildMessages(
|
|
messages,
|
|
parentMessageId,
|
|
{ instructions = null, additional_instructions = null },
|
|
opts,
|
|
) {
|
|
/** Always pass mapMethod; getMessagesForConversation applies it only to messages with addedConvo flag */
|
|
const orderedMessages = this.constructor.getMessagesForConversation({
|
|
messages,
|
|
parentMessageId,
|
|
summary: this.shouldSummarize,
|
|
mapMethod: createMultiAgentMapper(this.options.agent, this.agentConfigs),
|
|
mapCondition: (message) => message.addedConvo === true,
|
|
});
|
|
|
|
let payload;
|
|
/** @type {number | undefined} */
|
|
let promptTokens;
|
|
|
|
/** @type {string} */
|
|
let systemContent = [instructions ?? '', additional_instructions ?? '']
|
|
.filter(Boolean)
|
|
.join('\n')
|
|
.trim();
|
|
|
|
if (this.options.attachments) {
|
|
const attachments = await this.options.attachments;
|
|
const latestMessage = orderedMessages[orderedMessages.length - 1];
|
|
|
|
if (this.message_file_map) {
|
|
this.message_file_map[latestMessage.messageId] = attachments;
|
|
} else {
|
|
this.message_file_map = {
|
|
[latestMessage.messageId]: attachments,
|
|
};
|
|
}
|
|
|
|
await this.addFileContextToMessage(latestMessage, attachments);
|
|
const files = await this.processAttachments(latestMessage, attachments);
|
|
|
|
this.options.attachments = files;
|
|
}
|
|
|
|
/** Note: Bedrock uses legacy RAG API handling */
|
|
if (this.message_file_map && !isAgentsEndpoint(this.options.endpoint)) {
|
|
this.contextHandlers = createContextHandlers(
|
|
this.options.req,
|
|
orderedMessages[orderedMessages.length - 1].text,
|
|
);
|
|
}
|
|
|
|
const formattedMessages = orderedMessages.map((message, i) => {
|
|
const formattedMessage = formatMessage({
|
|
message,
|
|
userName: this.options?.name,
|
|
assistantName: this.options?.modelLabel,
|
|
});
|
|
|
|
if (message.fileContext && i !== orderedMessages.length - 1) {
|
|
if (typeof formattedMessage.content === 'string') {
|
|
formattedMessage.content = message.fileContext + '\n' + formattedMessage.content;
|
|
} else {
|
|
const textPart = formattedMessage.content.find((part) => part.type === 'text');
|
|
textPart
|
|
? (textPart.text = message.fileContext + '\n' + textPart.text)
|
|
: formattedMessage.content.unshift({ type: 'text', text: message.fileContext });
|
|
}
|
|
} else if (message.fileContext && i === orderedMessages.length - 1) {
|
|
systemContent = [systemContent, message.fileContext].join('\n');
|
|
}
|
|
|
|
const needsTokenCount =
|
|
(this.contextStrategy && !orderedMessages[i].tokenCount) || message.fileContext;
|
|
|
|
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
|
|
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
|
|
orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage);
|
|
}
|
|
|
|
/* If message has files, calculate image token cost */
|
|
if (this.message_file_map && this.message_file_map[message.messageId]) {
|
|
const attachments = this.message_file_map[message.messageId];
|
|
for (const file of attachments) {
|
|
if (file.embedded) {
|
|
this.contextHandlers?.processFile(file);
|
|
continue;
|
|
}
|
|
if (file.metadata?.fileIdentifier) {
|
|
continue;
|
|
}
|
|
// orderedMessages[i].tokenCount += this.calculateImageTokenCost({
|
|
// width: file.width,
|
|
// height: file.height,
|
|
// detail: this.options.imageDetail ?? ImageDetail.auto,
|
|
// });
|
|
}
|
|
}
|
|
|
|
return formattedMessage;
|
|
});
|
|
|
|
if (this.contextHandlers) {
|
|
this.augmentedPrompt = await this.contextHandlers.createContext();
|
|
systemContent = this.augmentedPrompt + systemContent;
|
|
}
|
|
|
|
// Inject MCP server instructions if available
|
|
const ephemeralAgent = this.options.req.body.ephemeralAgent;
|
|
let mcpServers = [];
|
|
|
|
// Check for ephemeral agent MCP servers
|
|
if (ephemeralAgent && ephemeralAgent.mcp && ephemeralAgent.mcp.length > 0) {
|
|
mcpServers = ephemeralAgent.mcp;
|
|
}
|
|
// Check for regular agent MCP tools
|
|
else if (this.options.agent && this.options.agent.tools) {
|
|
mcpServers = this.options.agent.tools
|
|
.filter(
|
|
(tool) =>
|
|
tool instanceof DynamicStructuredTool && tool.name.includes(Constants.mcp_delimiter),
|
|
)
|
|
.map((tool) => tool.name.split(Constants.mcp_delimiter).pop())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
if (mcpServers.length > 0) {
|
|
try {
|
|
const mcpInstructions = await getMCPManager().formatInstructionsForContext(mcpServers);
|
|
if (mcpInstructions) {
|
|
systemContent = [systemContent, mcpInstructions].filter(Boolean).join('\n\n');
|
|
logger.debug('[AgentClient] Injected MCP instructions for servers:', mcpServers);
|
|
}
|
|
} catch (error) {
|
|
logger.error('[AgentClient] Failed to inject MCP instructions:', error);
|
|
}
|
|
}
|
|
|
|
if (systemContent) {
|
|
this.options.agent.instructions = systemContent;
|
|
}
|
|
|
|
/** @type {Record<string, number> | undefined} */
|
|
let tokenCountMap;
|
|
|
|
if (this.contextStrategy) {
|
|
({ payload, promptTokens, tokenCountMap, messages } = await this.handleContextStrategy({
|
|
orderedMessages,
|
|
formattedMessages,
|
|
}));
|
|
}
|
|
|
|
for (let i = 0; i < messages.length; i++) {
|
|
this.indexTokenCountMap[i] = messages[i].tokenCount;
|
|
}
|
|
|
|
const result = {
|
|
tokenCountMap,
|
|
prompt: payload,
|
|
promptTokens,
|
|
messages,
|
|
};
|
|
|
|
if (promptTokens >= 0 && typeof opts?.getReqData === 'function') {
|
|
opts.getReqData({ promptTokens });
|
|
}
|
|
|
|
const withoutKeys = await this.useMemory();
|
|
const memoryContext = withoutKeys
|
|
? `${memoryInstructions}\n\n# Existing memory about the user:\n${withoutKeys}`
|
|
: '';
|
|
if (memoryContext) {
|
|
systemContent += memoryContext;
|
|
}
|
|
|
|
if (systemContent) {
|
|
this.options.agent.instructions = systemContent;
|
|
}
|
|
|
|
/**
|
|
* Pass memory context to parallel agents (addedConvo) so they have the same user context.
|
|
*
|
|
* NOTE: This intentionally mutates the agentConfig objects in place. The agentConfigs Map
|
|
* holds references to config objects that will be passed to the graph runtime. Mutating
|
|
* them here ensures all parallel agents receive the memory context before execution starts.
|
|
* Creating new objects would not work because the Map references would still point to the old objects.
|
|
*/
|
|
if (memoryContext && this.agentConfigs?.size > 0) {
|
|
for (const [agentId, agentConfig] of this.agentConfigs.entries()) {
|
|
if (agentConfig.instructions) {
|
|
agentConfig.instructions = agentConfig.instructions + '\n\n' + memoryContext;
|
|
} else {
|
|
agentConfig.instructions = memoryContext;
|
|
}
|
|
logger.debug(`[AgentClient] Added memory context to parallel agent: ${agentId}`);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Creates a promise that resolves with the memory promise result or undefined after a timeout
|
|
* @param {Promise<(TAttachment | null)[] | undefined>} memoryPromise - The memory promise to await
|
|
* @param {number} timeoutMs - Timeout in milliseconds (default: 3000)
|
|
* @returns {Promise<(TAttachment | null)[] | undefined>}
|
|
*/
|
|
async awaitMemoryWithTimeout(memoryPromise, timeoutMs = 3000) {
|
|
if (!memoryPromise) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
const timeoutPromise = new Promise((_, reject) =>
|
|
setTimeout(() => reject(new Error('Memory processing timeout')), timeoutMs),
|
|
);
|
|
|
|
const attachments = await Promise.race([memoryPromise, timeoutPromise]);
|
|
return attachments;
|
|
} catch (error) {
|
|
if (error.message === 'Memory processing timeout') {
|
|
logger.warn('[AgentClient] Memory processing timed out after 3 seconds');
|
|
} else {
|
|
logger.error('[AgentClient] Error processing memory:', error);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @returns {Promise<string | undefined>}
|
|
*/
|
|
async useMemory() {
|
|
const user = this.options.req.user;
|
|
if (user.personalization?.memories === false) {
|
|
return;
|
|
}
|
|
const hasAccess = await checkAccess({
|
|
user,
|
|
permissionType: PermissionTypes.MEMORIES,
|
|
permissions: [Permissions.USE],
|
|
getRoleByName,
|
|
});
|
|
|
|
if (!hasAccess) {
|
|
logger.debug(
|
|
`[api/server/controllers/agents/client.js #useMemory] User ${user.id} does not have USE permission for memories`,
|
|
);
|
|
return;
|
|
}
|
|
const appConfig = this.options.req.config;
|
|
const memoryConfig = appConfig.memory;
|
|
if (!memoryConfig || memoryConfig.disabled === true) {
|
|
return;
|
|
}
|
|
|
|
/** @type {Agent} */
|
|
let prelimAgent;
|
|
const allowedProviders = new Set(
|
|
appConfig?.endpoints?.[EModelEndpoint.agents]?.allowedProviders,
|
|
);
|
|
try {
|
|
if (memoryConfig.agent?.id != null && memoryConfig.agent.id !== this.options.agent.id) {
|
|
prelimAgent = await loadAgent({
|
|
req: this.options.req,
|
|
agent_id: memoryConfig.agent.id,
|
|
endpoint: EModelEndpoint.agents,
|
|
});
|
|
} else if (
|
|
memoryConfig.agent?.id == null &&
|
|
memoryConfig.agent?.model != null &&
|
|
memoryConfig.agent?.provider != null
|
|
) {
|
|
prelimAgent = { id: Constants.EPHEMERAL_AGENT_ID, ...memoryConfig.agent };
|
|
}
|
|
} catch (error) {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #useMemory] Error loading agent for memory',
|
|
error,
|
|
);
|
|
}
|
|
|
|
const agent = await initializeAgent(
|
|
{
|
|
req: this.options.req,
|
|
res: this.options.res,
|
|
agent: prelimAgent,
|
|
allowedProviders,
|
|
endpointOption: {
|
|
endpoint: !isEphemeralAgentId(prelimAgent.id)
|
|
? EModelEndpoint.agents
|
|
: memoryConfig.agent?.provider,
|
|
},
|
|
},
|
|
{
|
|
getConvoFiles,
|
|
getFiles: db.getFiles,
|
|
getUserKey: db.getUserKey,
|
|
updateFilesUsage: db.updateFilesUsage,
|
|
getUserKeyValues: db.getUserKeyValues,
|
|
getToolFilesByIds: db.getToolFilesByIds,
|
|
},
|
|
);
|
|
|
|
if (!agent) {
|
|
logger.warn(
|
|
'[api/server/controllers/agents/client.js #useMemory] No agent found for memory',
|
|
memoryConfig,
|
|
);
|
|
return;
|
|
}
|
|
|
|
const llmConfig = Object.assign(
|
|
{
|
|
provider: agent.provider,
|
|
model: agent.model,
|
|
},
|
|
agent.model_parameters,
|
|
);
|
|
|
|
/** @type {import('@librechat/api').MemoryConfig} */
|
|
const config = {
|
|
validKeys: memoryConfig.validKeys,
|
|
instructions: agent.instructions,
|
|
llmConfig,
|
|
tokenLimit: memoryConfig.tokenLimit,
|
|
};
|
|
|
|
const userId = this.options.req.user.id + '';
|
|
const messageId = this.responseMessageId + '';
|
|
const conversationId = this.conversationId + '';
|
|
const streamId = this.options.req?._resumableStreamId || null;
|
|
const [withoutKeys, processMemory] = await createMemoryProcessor({
|
|
userId,
|
|
config,
|
|
messageId,
|
|
streamId,
|
|
conversationId,
|
|
memoryMethods: {
|
|
setMemory: db.setMemory,
|
|
deleteMemory: db.deleteMemory,
|
|
getFormattedMemories: db.getFormattedMemories,
|
|
},
|
|
res: this.options.res,
|
|
user: createSafeUser(this.options.req.user),
|
|
});
|
|
|
|
this.processMemory = processMemory;
|
|
return withoutKeys;
|
|
}
|
|
|
|
/**
|
|
* Filters out image URLs from message content
|
|
* @param {BaseMessage} message - The message to filter
|
|
* @returns {BaseMessage} - A new message with image URLs removed
|
|
*/
|
|
filterImageUrls(message) {
|
|
if (!message.content || typeof message.content === 'string') {
|
|
return message;
|
|
}
|
|
|
|
if (Array.isArray(message.content)) {
|
|
const filteredContent = message.content.filter(
|
|
(part) => part.type !== ContentTypes.IMAGE_URL,
|
|
);
|
|
|
|
if (filteredContent.length === 1 && filteredContent[0].type === ContentTypes.TEXT) {
|
|
const MessageClass = message.constructor;
|
|
return new MessageClass({
|
|
content: filteredContent[0].text,
|
|
additional_kwargs: message.additional_kwargs,
|
|
});
|
|
}
|
|
|
|
const MessageClass = message.constructor;
|
|
return new MessageClass({
|
|
content: filteredContent,
|
|
additional_kwargs: message.additional_kwargs,
|
|
});
|
|
}
|
|
|
|
return message;
|
|
}
|
|
|
|
/**
|
|
* @param {BaseMessage[]} messages
|
|
* @returns {Promise<void | (TAttachment | null)[]>}
|
|
*/
|
|
async runMemory(messages) {
|
|
try {
|
|
if (this.processMemory == null) {
|
|
return;
|
|
}
|
|
const appConfig = this.options.req.config;
|
|
const memoryConfig = appConfig.memory;
|
|
const messageWindowSize = memoryConfig?.messageWindowSize ?? 5;
|
|
|
|
let messagesToProcess = [...messages];
|
|
if (messages.length > messageWindowSize) {
|
|
for (let i = messages.length - messageWindowSize; i >= 0; i--) {
|
|
const potentialWindow = messages.slice(i, i + messageWindowSize);
|
|
if (potentialWindow[0]?.role === 'user') {
|
|
messagesToProcess = [...potentialWindow];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (messagesToProcess.length === messages.length) {
|
|
messagesToProcess = [...messages.slice(-messageWindowSize)];
|
|
}
|
|
}
|
|
|
|
const filteredMessages = messagesToProcess.map((msg) => this.filterImageUrls(msg));
|
|
const bufferString = getBufferString(filteredMessages);
|
|
const bufferMessage = new HumanMessage(`# Current Chat:\n\n${bufferString}`);
|
|
return await this.processMemory([bufferMessage]);
|
|
} catch (error) {
|
|
logger.error('Memory Agent failed to process memory', error);
|
|
}
|
|
}
|
|
|
|
/** @type {sendCompletion} */
|
|
async sendCompletion(payload, opts = {}) {
|
|
await this.chatCompletion({
|
|
payload,
|
|
onProgress: opts.onProgress,
|
|
userMCPAuthMap: opts.userMCPAuthMap,
|
|
abortController: opts.abortController,
|
|
});
|
|
|
|
const completion = filterMalformedContentParts(this.contentParts);
|
|
return { completion };
|
|
}
|
|
|
|
/**
|
|
* @param {Object} params
|
|
* @param {string} [params.model]
|
|
* @param {string} [params.context='message']
|
|
* @param {AppConfig['balance']} [params.balance]
|
|
* @param {AppConfig['transactions']} [params.transactions]
|
|
* @param {UsageMetadata[]} [params.collectedUsage=this.collectedUsage]
|
|
*/
|
|
async recordCollectedUsage({
|
|
model,
|
|
balance,
|
|
transactions,
|
|
context = 'message',
|
|
collectedUsage = this.collectedUsage,
|
|
}) {
|
|
if (!collectedUsage || !collectedUsage.length) {
|
|
return;
|
|
}
|
|
// Use first entry's input_tokens as the base input (represents initial user message context)
|
|
// Support both OpenAI format (input_token_details) and Anthropic format (cache_*_input_tokens)
|
|
const firstUsage = collectedUsage[0];
|
|
const input_tokens =
|
|
(firstUsage?.input_tokens || 0) +
|
|
(Number(firstUsage?.input_token_details?.cache_creation) ||
|
|
Number(firstUsage?.cache_creation_input_tokens) ||
|
|
0) +
|
|
(Number(firstUsage?.input_token_details?.cache_read) ||
|
|
Number(firstUsage?.cache_read_input_tokens) ||
|
|
0);
|
|
|
|
// Sum output_tokens directly from all entries - works for both sequential and parallel execution
|
|
// This avoids the incremental calculation that produced negative values for parallel agents
|
|
let total_output_tokens = 0;
|
|
|
|
for (const usage of collectedUsage) {
|
|
if (!usage) {
|
|
continue;
|
|
}
|
|
|
|
// Support both OpenAI format (input_token_details) and Anthropic format (cache_*_input_tokens)
|
|
const cache_creation =
|
|
Number(usage.input_token_details?.cache_creation) ||
|
|
Number(usage.cache_creation_input_tokens) ||
|
|
0;
|
|
const cache_read =
|
|
Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0;
|
|
|
|
// Accumulate output tokens for the usage summary
|
|
total_output_tokens += Number(usage.output_tokens) || 0;
|
|
|
|
const txMetadata = {
|
|
context,
|
|
balance,
|
|
transactions,
|
|
conversationId: this.conversationId,
|
|
user: this.user ?? this.options.req.user?.id,
|
|
endpointTokenConfig: this.options.endpointTokenConfig,
|
|
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
|
|
};
|
|
|
|
if (cache_creation > 0 || cache_read > 0) {
|
|
spendStructuredTokens(txMetadata, {
|
|
promptTokens: {
|
|
input: usage.input_tokens,
|
|
write: cache_creation,
|
|
read: cache_read,
|
|
},
|
|
completionTokens: usage.output_tokens,
|
|
}).catch((err) => {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens',
|
|
err,
|
|
);
|
|
});
|
|
continue;
|
|
}
|
|
spendTokens(txMetadata, {
|
|
promptTokens: usage.input_tokens,
|
|
completionTokens: usage.output_tokens,
|
|
}).catch((err) => {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
|
|
err,
|
|
);
|
|
});
|
|
}
|
|
|
|
this.usage = {
|
|
input_tokens,
|
|
output_tokens: total_output_tokens,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get stream usage as returned by this client's API response.
|
|
* @returns {UsageMetadata} The stream usage object.
|
|
*/
|
|
getStreamUsage() {
|
|
return this.usage;
|
|
}
|
|
|
|
/**
|
|
* @param {TMessage} responseMessage
|
|
* @returns {number}
|
|
*/
|
|
getTokenCountForResponse({ content }) {
|
|
return this.getTokenCountForMessage({
|
|
role: 'assistant',
|
|
content,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Calculates the correct token count for the current user message based on the token count map and API usage.
|
|
* Edge case: If the calculation results in a negative value, it returns the original estimate.
|
|
* If revisiting a conversation with a chat history entirely composed of token estimates,
|
|
* the cumulative token count going forward should become more accurate as the conversation progresses.
|
|
* @param {Object} params - The parameters for the calculation.
|
|
* @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
|
|
* @param {string} params.currentMessageId - The ID of the current message to calculate.
|
|
* @param {OpenAIUsageMetadata} params.usage - The usage object returned by the API.
|
|
* @returns {number} The correct token count for the current user message.
|
|
*/
|
|
calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
|
|
const originalEstimate = tokenCountMap[currentMessageId] || 0;
|
|
|
|
if (!usage || typeof usage[this.inputTokensKey] !== 'number') {
|
|
return originalEstimate;
|
|
}
|
|
|
|
tokenCountMap[currentMessageId] = 0;
|
|
const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => {
|
|
const numCount = Number(count);
|
|
return sum + (isNaN(numCount) ? 0 : numCount);
|
|
}, 0);
|
|
const totalInputTokens = usage[this.inputTokensKey] ?? 0;
|
|
|
|
const currentMessageTokens = totalInputTokens - totalTokensFromMap;
|
|
return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate;
|
|
}
|
|
|
|
/**
|
|
* @param {object} params
|
|
* @param {string | ChatCompletionMessageParam[]} params.payload
|
|
* @param {Record<string, Record<string, string>>} [params.userMCPAuthMap]
|
|
* @param {AbortController} [params.abortController]
|
|
*/
|
|
async chatCompletion({ payload, userMCPAuthMap, abortController = null }) {
|
|
/** @type {Partial<GraphRunnableConfig>} */
|
|
let config;
|
|
/** @type {ReturnType<createRun>} */
|
|
let run;
|
|
/** @type {Promise<(TAttachment | null)[] | undefined>} */
|
|
let memoryPromise;
|
|
const appConfig = this.options.req.config;
|
|
const balanceConfig = getBalanceConfig(appConfig);
|
|
const transactionsConfig = getTransactionsConfig(appConfig);
|
|
try {
|
|
if (!abortController) {
|
|
abortController = new AbortController();
|
|
}
|
|
|
|
/** @type {AppConfig['endpoints']['agents']} */
|
|
const agentsEConfig = appConfig.endpoints?.[EModelEndpoint.agents];
|
|
|
|
config = {
|
|
runName: 'AgentRun',
|
|
configurable: {
|
|
thread_id: this.conversationId,
|
|
last_agent_index: this.agentConfigs?.size ?? 0,
|
|
user_id: this.user ?? this.options.req.user?.id,
|
|
hide_sequential_outputs: this.options.agent.hide_sequential_outputs,
|
|
requestBody: {
|
|
messageId: this.responseMessageId,
|
|
conversationId: this.conversationId,
|
|
parentMessageId: this.parentMessageId,
|
|
},
|
|
user: createSafeUser(this.options.req.user),
|
|
},
|
|
recursionLimit: agentsEConfig?.recursionLimit ?? 25,
|
|
signal: abortController.signal,
|
|
streamMode: 'values',
|
|
version: 'v2',
|
|
};
|
|
|
|
const toolSet = new Set((this.options.agent.tools ?? []).map((tool) => tool && tool.name));
|
|
let { messages: initialMessages, indexTokenCountMap } = formatAgentMessages(
|
|
payload,
|
|
this.indexTokenCountMap,
|
|
toolSet,
|
|
);
|
|
|
|
/**
|
|
* @param {BaseMessage[]} messages
|
|
*/
|
|
const runAgents = async (messages) => {
|
|
const agents = [this.options.agent];
|
|
// Include additional agents when:
|
|
// - agentConfigs has agents (from addedConvo parallel execution or agent handoffs)
|
|
// - Agents without incoming edges become start nodes and run in parallel automatically
|
|
if (this.agentConfigs && this.agentConfigs.size > 0) {
|
|
agents.push(...this.agentConfigs.values());
|
|
}
|
|
|
|
if (agents[0].recursion_limit && typeof agents[0].recursion_limit === 'number') {
|
|
config.recursionLimit = agents[0].recursion_limit;
|
|
}
|
|
|
|
if (
|
|
agentsEConfig?.maxRecursionLimit &&
|
|
config.recursionLimit > agentsEConfig?.maxRecursionLimit
|
|
) {
|
|
config.recursionLimit = agentsEConfig?.maxRecursionLimit;
|
|
}
|
|
|
|
// TODO: needs to be added as part of AgentContext initialization
|
|
// const noSystemModelRegex = [/\b(o1-preview|o1-mini|amazon\.titan-text)\b/gi];
|
|
// const noSystemMessages = noSystemModelRegex.some((regex) =>
|
|
// agent.model_parameters.model.match(regex),
|
|
// );
|
|
// if (noSystemMessages === true && systemContent?.length) {
|
|
// const latestMessageContent = _messages.pop().content;
|
|
// if (typeof latestMessageContent !== 'string') {
|
|
// latestMessageContent[0].text = [systemContent, latestMessageContent[0].text].join('\n');
|
|
// _messages.push(new HumanMessage({ content: latestMessageContent }));
|
|
// } else {
|
|
// const text = [systemContent, latestMessageContent].join('\n');
|
|
// _messages.push(new HumanMessage(text));
|
|
// }
|
|
// }
|
|
// let messages = _messages;
|
|
// if (agent.useLegacyContent === true) {
|
|
// messages = formatContentStrings(messages);
|
|
// }
|
|
// if (
|
|
// agent.model_parameters?.clientOptions?.defaultHeaders?.['anthropic-beta']?.includes(
|
|
// 'prompt-caching',
|
|
// )
|
|
// ) {
|
|
// messages = addCacheControl(messages);
|
|
// }
|
|
|
|
memoryPromise = this.runMemory(messages);
|
|
|
|
run = await createRun({
|
|
agents,
|
|
indexTokenCountMap,
|
|
runId: this.responseMessageId,
|
|
signal: abortController.signal,
|
|
customHandlers: this.options.eventHandlers,
|
|
requestBody: config.configurable.requestBody,
|
|
user: createSafeUser(this.options.req?.user),
|
|
tokenCounter: createTokenCounter(this.getEncoding()),
|
|
});
|
|
|
|
if (!run) {
|
|
throw new Error('Failed to create run');
|
|
}
|
|
|
|
this.run = run;
|
|
|
|
const streamId = this.options.req?._resumableStreamId;
|
|
if (streamId && run.Graph) {
|
|
GenerationJobManager.setGraph(streamId, run.Graph);
|
|
}
|
|
|
|
if (userMCPAuthMap != null) {
|
|
config.configurable.userMCPAuthMap = userMCPAuthMap;
|
|
}
|
|
|
|
/** @deprecated Agent Chain */
|
|
config.configurable.last_agent_id = agents[agents.length - 1].id;
|
|
await run.processStream({ messages }, config, {
|
|
callbacks: {
|
|
[Callback.TOOL_ERROR]: logToolError,
|
|
},
|
|
});
|
|
|
|
config.signal = null;
|
|
};
|
|
|
|
await runAgents(initialMessages);
|
|
/** @deprecated Agent Chain */
|
|
if (config.configurable.hide_sequential_outputs) {
|
|
this.contentParts = this.contentParts.filter((part, index) => {
|
|
// Include parts that are either:
|
|
// 1. At or after the finalContentStart index
|
|
// 2. Of type tool_call
|
|
// 3. Have tool_call_ids property
|
|
return (
|
|
index >= this.contentParts.length - 1 ||
|
|
part.type === ContentTypes.TOOL_CALL ||
|
|
part.tool_call_ids
|
|
);
|
|
});
|
|
}
|
|
} catch (err) {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #sendCompletion] Operation aborted',
|
|
err,
|
|
);
|
|
if (!abortController.signal.aborted) {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #sendCompletion] Unhandled error type',
|
|
err,
|
|
);
|
|
this.contentParts.push({
|
|
type: ContentTypes.ERROR,
|
|
[ContentTypes.ERROR]: `An error occurred while processing the request${err?.message ? `: ${err.message}` : ''}`,
|
|
});
|
|
}
|
|
} finally {
|
|
try {
|
|
const attachments = await this.awaitMemoryWithTimeout(memoryPromise);
|
|
if (attachments && attachments.length > 0) {
|
|
this.artifactPromises.push(...attachments);
|
|
}
|
|
|
|
/** Skip token spending if aborted - the abort handler (abortMiddleware.js) handles it
|
|
This prevents double-spending when user aborts via `/api/agents/chat/abort` */
|
|
const wasAborted = abortController?.signal?.aborted;
|
|
if (!wasAborted) {
|
|
await this.recordCollectedUsage({
|
|
context: 'message',
|
|
balance: balanceConfig,
|
|
transactions: transactionsConfig,
|
|
});
|
|
} else {
|
|
logger.debug(
|
|
'[api/server/controllers/agents/client.js #chatCompletion] Skipping token spending - handled by abort middleware',
|
|
);
|
|
}
|
|
} catch (err) {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #chatCompletion] Error in cleanup phase',
|
|
err,
|
|
);
|
|
}
|
|
run = null;
|
|
config = null;
|
|
memoryPromise = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Object} params
|
|
* @param {string} params.text
|
|
* @param {string} params.conversationId
|
|
*/
|
|
async titleConvo({ text, abortController }) {
|
|
if (!this.run) {
|
|
throw new Error('Run not initialized');
|
|
}
|
|
const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
|
|
const { req, agent } = this.options;
|
|
|
|
if (req?.body?.isTemporary) {
|
|
logger.debug(
|
|
`[api/server/controllers/agents/client.js #titleConvo] Skipping title generation for temporary conversation`,
|
|
);
|
|
return;
|
|
}
|
|
|
|
const appConfig = req.config;
|
|
let endpoint = agent.endpoint;
|
|
|
|
/** @type {import('@librechat/agents').ClientOptions} */
|
|
let clientOptions = {
|
|
model: agent.model || agent.model_parameters.model,
|
|
};
|
|
|
|
let titleProviderConfig = getProviderConfig({ provider: endpoint, appConfig });
|
|
|
|
/** @type {TEndpoint | undefined} */
|
|
const endpointConfig =
|
|
appConfig.endpoints?.all ??
|
|
appConfig.endpoints?.[endpoint] ??
|
|
titleProviderConfig.customEndpointConfig;
|
|
if (!endpointConfig) {
|
|
logger.debug(
|
|
`[api/server/controllers/agents/client.js #titleConvo] No endpoint config for "${endpoint}"`,
|
|
);
|
|
}
|
|
|
|
if (endpointConfig?.titleConvo === false) {
|
|
logger.debug(
|
|
`[api/server/controllers/agents/client.js #titleConvo] Title generation disabled for endpoint "${endpoint}"`,
|
|
);
|
|
return;
|
|
}
|
|
|
|
if (endpointConfig?.titleEndpoint && endpointConfig.titleEndpoint !== endpoint) {
|
|
try {
|
|
titleProviderConfig = getProviderConfig({
|
|
provider: endpointConfig.titleEndpoint,
|
|
appConfig,
|
|
});
|
|
endpoint = endpointConfig.titleEndpoint;
|
|
} catch (error) {
|
|
logger.warn(
|
|
`[api/server/controllers/agents/client.js #titleConvo] Error getting title endpoint config for "${endpointConfig.titleEndpoint}", falling back to default`,
|
|
error,
|
|
);
|
|
// Fall back to original provider config
|
|
endpoint = agent.endpoint;
|
|
titleProviderConfig = getProviderConfig({ provider: endpoint, appConfig });
|
|
}
|
|
}
|
|
|
|
if (
|
|
endpointConfig &&
|
|
endpointConfig.titleModel &&
|
|
endpointConfig.titleModel !== Constants.CURRENT_MODEL
|
|
) {
|
|
clientOptions.model = endpointConfig.titleModel;
|
|
}
|
|
|
|
const options = await titleProviderConfig.getOptions({
|
|
req,
|
|
endpoint,
|
|
model_parameters: clientOptions,
|
|
db: {
|
|
getUserKey: db.getUserKey,
|
|
getUserKeyValues: db.getUserKeyValues,
|
|
},
|
|
});
|
|
|
|
let provider = options.provider ?? titleProviderConfig.overrideProvider ?? agent.provider;
|
|
if (
|
|
endpoint === EModelEndpoint.azureOpenAI &&
|
|
options.llmConfig?.azureOpenAIApiInstanceName == null
|
|
) {
|
|
provider = Providers.OPENAI;
|
|
} else if (
|
|
endpoint === EModelEndpoint.azureOpenAI &&
|
|
options.llmConfig?.azureOpenAIApiInstanceName != null &&
|
|
provider !== Providers.AZURE
|
|
) {
|
|
provider = Providers.AZURE;
|
|
}
|
|
|
|
/** @type {import('@librechat/agents').ClientOptions} */
|
|
clientOptions = { ...options.llmConfig };
|
|
if (options.configOptions) {
|
|
clientOptions.configuration = options.configOptions;
|
|
}
|
|
|
|
if (clientOptions.maxTokens != null) {
|
|
delete clientOptions.maxTokens;
|
|
}
|
|
if (clientOptions?.modelKwargs?.max_completion_tokens != null) {
|
|
delete clientOptions.modelKwargs.max_completion_tokens;
|
|
}
|
|
if (clientOptions?.modelKwargs?.max_output_tokens != null) {
|
|
delete clientOptions.modelKwargs.max_output_tokens;
|
|
}
|
|
|
|
clientOptions = Object.assign(
|
|
Object.fromEntries(
|
|
Object.entries(clientOptions).filter(([key]) => !omitTitleOptions.has(key)),
|
|
),
|
|
);
|
|
|
|
if (
|
|
provider === Providers.GOOGLE &&
|
|
(endpointConfig?.titleMethod === TitleMethod.FUNCTIONS ||
|
|
endpointConfig?.titleMethod === TitleMethod.STRUCTURED)
|
|
) {
|
|
clientOptions.json = true;
|
|
}
|
|
|
|
/** Resolve request-based headers for Custom Endpoints. Note: if this is added to
|
|
* non-custom endpoints, needs consideration of varying provider header configs.
|
|
*/
|
|
if (clientOptions?.configuration?.defaultHeaders != null) {
|
|
clientOptions.configuration.defaultHeaders = resolveHeaders({
|
|
headers: clientOptions.configuration.defaultHeaders,
|
|
user: createSafeUser(this.options.req?.user),
|
|
body: {
|
|
messageId: this.responseMessageId,
|
|
conversationId: this.conversationId,
|
|
parentMessageId: this.parentMessageId,
|
|
},
|
|
});
|
|
}
|
|
|
|
try {
|
|
const titleResult = await this.run.generateTitle({
|
|
provider,
|
|
clientOptions,
|
|
inputText: text,
|
|
contentParts: this.contentParts,
|
|
titleMethod: endpointConfig?.titleMethod,
|
|
titlePrompt: endpointConfig?.titlePrompt,
|
|
titlePromptTemplate: endpointConfig?.titlePromptTemplate,
|
|
chainOptions: {
|
|
signal: abortController.signal,
|
|
callbacks: [
|
|
{
|
|
handleLLMEnd,
|
|
},
|
|
],
|
|
configurable: {
|
|
thread_id: this.conversationId,
|
|
user_id: this.user ?? this.options.req.user?.id,
|
|
},
|
|
},
|
|
});
|
|
|
|
const collectedUsage = collectedMetadata.map((item) => {
|
|
let input_tokens, output_tokens;
|
|
|
|
if (item.usage) {
|
|
input_tokens =
|
|
item.usage.prompt_tokens || item.usage.input_tokens || item.usage.inputTokens;
|
|
output_tokens =
|
|
item.usage.completion_tokens || item.usage.output_tokens || item.usage.outputTokens;
|
|
} else if (item.tokenUsage) {
|
|
input_tokens = item.tokenUsage.promptTokens;
|
|
output_tokens = item.tokenUsage.completionTokens;
|
|
}
|
|
|
|
return {
|
|
input_tokens: input_tokens,
|
|
output_tokens: output_tokens,
|
|
};
|
|
});
|
|
|
|
const balanceConfig = getBalanceConfig(appConfig);
|
|
const transactionsConfig = getTransactionsConfig(appConfig);
|
|
await this.recordCollectedUsage({
|
|
collectedUsage,
|
|
context: 'title',
|
|
model: clientOptions.model,
|
|
balance: balanceConfig,
|
|
transactions: transactionsConfig,
|
|
}).catch((err) => {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #titleConvo] Error recording collected usage',
|
|
err,
|
|
);
|
|
});
|
|
|
|
return sanitizeTitle(titleResult.title);
|
|
} catch (err) {
|
|
logger.error('[api/server/controllers/agents/client.js #titleConvo] Error', err);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param {object} params
|
|
* @param {number} params.promptTokens
|
|
* @param {number} params.completionTokens
|
|
* @param {string} [params.model]
|
|
* @param {OpenAIUsageMetadata} [params.usage]
|
|
* @param {AppConfig['balance']} [params.balance]
|
|
* @param {string} [params.context='message']
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async recordTokenUsage({
|
|
model,
|
|
usage,
|
|
balance,
|
|
promptTokens,
|
|
completionTokens,
|
|
context = 'message',
|
|
}) {
|
|
try {
|
|
await spendTokens(
|
|
{
|
|
model,
|
|
context,
|
|
balance,
|
|
conversationId: this.conversationId,
|
|
user: this.user ?? this.options.req.user?.id,
|
|
endpointTokenConfig: this.options.endpointTokenConfig,
|
|
},
|
|
{ promptTokens, completionTokens },
|
|
);
|
|
|
|
if (
|
|
usage &&
|
|
typeof usage === 'object' &&
|
|
'reasoning_tokens' in usage &&
|
|
typeof usage.reasoning_tokens === 'number'
|
|
) {
|
|
await spendTokens(
|
|
{
|
|
model,
|
|
balance,
|
|
context: 'reasoning',
|
|
conversationId: this.conversationId,
|
|
user: this.user ?? this.options.req.user?.id,
|
|
endpointTokenConfig: this.options.endpointTokenConfig,
|
|
},
|
|
{ completionTokens: usage.reasoning_tokens },
|
|
);
|
|
}
|
|
} catch (error) {
|
|
logger.error(
|
|
'[api/server/controllers/agents/client.js #recordTokenUsage] Error recording token usage',
|
|
error,
|
|
);
|
|
}
|
|
}
|
|
|
|
getEncoding() {
|
|
return 'o200k_base';
|
|
}
|
|
|
|
/**
|
|
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
|
|
* @param {string} text - The text to get the token count for.
|
|
* @returns {number} The token count of the given text.
|
|
*/
|
|
getTokenCount(text) {
|
|
const encoding = this.getEncoding();
|
|
return Tokenizer.getTokenCount(text, encoding);
|
|
}
|
|
}
|
|
|
|
module.exports = AgentClient;
|