mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-22 18:26:12 +01:00
💰 fix: Multi-Agent Token Spending & Prevent Double-Spend (#11433)
* fix: Token Spending Logic for Multi-Agents on Abort Scenarios * Implemented logic to skip token spending if a conversation is aborted, preventing double-spending. * Introduced `spendCollectedUsage` function to handle token spending for multiple models during aborts, ensuring accurate accounting for parallel agents. * Updated `GenerationJobManager` to store and retrieve collected usage data for improved abort handling. * Added comprehensive tests for the new functionality, covering various scenarios including cache token handling and parallel agent usage. * fix: Memory Context Handling for Multi-Agents * Refactored `buildMessages` method to pass memory context to parallel agents, ensuring they share the same user context. * Improved handling of memory context when no existing instructions are present for parallel agents. * Added comprehensive tests to verify memory context propagation and behavior under various scenarios, including cases with no memory available and empty agent configurations. * Enhanced logging for better traceability of memory context additions to agents. * chore: Memory Context Documentation for Parallel Agents * Updated documentation in the `AgentClient` class to clarify the in-place mutation of agentConfig objects when passing memory context to parallel agents. * Added notes on the implications of mutating objects directly to ensure all parallel agents receive the correct memory context before execution. * chore: UsageMetadata Interface docs for Token Spending * Expanded the UsageMetadata interface to support both OpenAI and Anthropic cache token formats. * Added detailed documentation for cache token properties, including mutually exclusive fields for different model types. * Improved clarity on how to access cache token details for accurate token spending tracking. * fix: Enhance Token Spending Logic in Abort Middleware * Refactored `spendCollectedUsage` function to utilize Promise.all for concurrent token spending, improving performance and ensuring all operations complete before clearing the collectedUsage array. * Added documentation to clarify the importance of clearing the collectedUsage array to prevent double-spending in abort scenarios. * Updated tests to verify the correct behavior of the spending logic and the clearing of the array after spending operations.
This commit is contained in:
parent
32e6f3b8e5
commit
36c5a88c4e
11 changed files with 1440 additions and 28 deletions
|
|
@ -522,14 +522,36 @@ class AgentClient extends BaseClient {
|
|||
}
|
||||
|
||||
const withoutKeys = await this.useMemory();
|
||||
if (withoutKeys) {
|
||||
systemContent += `${memoryInstructions}\n\n# Existing memory about the user:\n${withoutKeys}`;
|
||||
const memoryContext = withoutKeys
|
||||
? `${memoryInstructions}\n\n# Existing memory about the user:\n${withoutKeys}`
|
||||
: '';
|
||||
if (memoryContext) {
|
||||
systemContent += memoryContext;
|
||||
}
|
||||
|
||||
if (systemContent) {
|
||||
this.options.agent.instructions = systemContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pass memory context to parallel agents (addedConvo) so they have the same user context.
|
||||
*
|
||||
* NOTE: This intentionally mutates the agentConfig objects in place. The agentConfigs Map
|
||||
* holds references to config objects that will be passed to the graph runtime. Mutating
|
||||
* them here ensures all parallel agents receive the memory context before execution starts.
|
||||
* Creating new objects would not work because the Map references would still point to the old objects.
|
||||
*/
|
||||
if (memoryContext && this.agentConfigs?.size > 0) {
|
||||
for (const [agentId, agentConfig] of this.agentConfigs.entries()) {
|
||||
if (agentConfig.instructions) {
|
||||
agentConfig.instructions = agentConfig.instructions + '\n\n' + memoryContext;
|
||||
} else {
|
||||
agentConfig.instructions = memoryContext;
|
||||
}
|
||||
logger.debug(`[AgentClient] Added memory context to parallel agent: ${agentId}`);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -1084,11 +1106,20 @@ class AgentClient extends BaseClient {
|
|||
this.artifactPromises.push(...attachments);
|
||||
}
|
||||
|
||||
await this.recordCollectedUsage({
|
||||
context: 'message',
|
||||
balance: balanceConfig,
|
||||
transactions: transactionsConfig,
|
||||
});
|
||||
/** Skip token spending if aborted - the abort handler (abortMiddleware.js) handles it
|
||||
This prevents double-spending when user aborts via `/api/agents/chat/abort` */
|
||||
const wasAborted = abortController?.signal?.aborted;
|
||||
if (!wasAborted) {
|
||||
await this.recordCollectedUsage({
|
||||
context: 'message',
|
||||
balance: balanceConfig,
|
||||
transactions: transactionsConfig,
|
||||
});
|
||||
} else {
|
||||
logger.debug(
|
||||
'[api/server/controllers/agents/client.js #chatCompletion] Skipping token spending - handled by abort middleware',
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error(
|
||||
'[api/server/controllers/agents/client.js #chatCompletion] Error in cleanup phase',
|
||||
|
|
|
|||
|
|
@ -1849,4 +1849,224 @@ describe('AgentClient - titleConvo', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildMessages - memory context for parallel agents', () => {
|
||||
let client;
|
||||
let mockReq;
|
||||
let mockRes;
|
||||
let mockAgent;
|
||||
let mockOptions;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
|
||||
mockAgent = {
|
||||
id: 'primary-agent',
|
||||
name: 'Primary Agent',
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
provider: EModelEndpoint.openAI,
|
||||
instructions: 'Primary agent instructions',
|
||||
model_parameters: {
|
||||
model: 'gpt-4',
|
||||
},
|
||||
tools: [],
|
||||
};
|
||||
|
||||
mockReq = {
|
||||
user: {
|
||||
id: 'user-123',
|
||||
personalization: {
|
||||
memories: true,
|
||||
},
|
||||
},
|
||||
body: {
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
},
|
||||
config: {
|
||||
memory: {
|
||||
disabled: false,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
mockRes = {};
|
||||
|
||||
mockOptions = {
|
||||
req: mockReq,
|
||||
res: mockRes,
|
||||
agent: mockAgent,
|
||||
endpoint: EModelEndpoint.agents,
|
||||
};
|
||||
|
||||
client = new AgentClient(mockOptions);
|
||||
client.conversationId = 'convo-123';
|
||||
client.responseMessageId = 'response-123';
|
||||
client.shouldSummarize = false;
|
||||
client.maxContextTokens = 4096;
|
||||
});
|
||||
|
||||
it('should pass memory context to parallel agents (addedConvo)', async () => {
|
||||
const memoryContent = 'User prefers dark mode. User is a software developer.';
|
||||
client.useMemory = jest.fn().mockResolvedValue(memoryContent);
|
||||
|
||||
const parallelAgent1 = {
|
||||
id: 'parallel-agent-1',
|
||||
name: 'Parallel Agent 1',
|
||||
instructions: 'Parallel agent 1 instructions',
|
||||
provider: EModelEndpoint.openAI,
|
||||
};
|
||||
|
||||
const parallelAgent2 = {
|
||||
id: 'parallel-agent-2',
|
||||
name: 'Parallel Agent 2',
|
||||
instructions: 'Parallel agent 2 instructions',
|
||||
provider: EModelEndpoint.anthropic,
|
||||
};
|
||||
|
||||
client.agentConfigs = new Map([
|
||||
['parallel-agent-1', parallelAgent1],
|
||||
['parallel-agent-2', parallelAgent2],
|
||||
]);
|
||||
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: null,
|
||||
sender: 'User',
|
||||
text: 'Hello',
|
||||
isCreatedByUser: true,
|
||||
},
|
||||
];
|
||||
|
||||
await client.buildMessages(messages, null, {
|
||||
instructions: 'Base instructions',
|
||||
additional_instructions: null,
|
||||
});
|
||||
|
||||
expect(client.useMemory).toHaveBeenCalled();
|
||||
|
||||
expect(client.options.agent.instructions).toContain('Base instructions');
|
||||
expect(client.options.agent.instructions).toContain(memoryContent);
|
||||
|
||||
expect(parallelAgent1.instructions).toContain('Parallel agent 1 instructions');
|
||||
expect(parallelAgent1.instructions).toContain(memoryContent);
|
||||
|
||||
expect(parallelAgent2.instructions).toContain('Parallel agent 2 instructions');
|
||||
expect(parallelAgent2.instructions).toContain(memoryContent);
|
||||
});
|
||||
|
||||
it('should not modify parallel agents when no memory context is available', async () => {
|
||||
client.useMemory = jest.fn().mockResolvedValue(undefined);
|
||||
|
||||
const parallelAgent = {
|
||||
id: 'parallel-agent-1',
|
||||
name: 'Parallel Agent 1',
|
||||
instructions: 'Original parallel instructions',
|
||||
provider: EModelEndpoint.openAI,
|
||||
};
|
||||
|
||||
client.agentConfigs = new Map([['parallel-agent-1', parallelAgent]]);
|
||||
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: null,
|
||||
sender: 'User',
|
||||
text: 'Hello',
|
||||
isCreatedByUser: true,
|
||||
},
|
||||
];
|
||||
|
||||
await client.buildMessages(messages, null, {
|
||||
instructions: 'Base instructions',
|
||||
additional_instructions: null,
|
||||
});
|
||||
|
||||
expect(parallelAgent.instructions).toBe('Original parallel instructions');
|
||||
});
|
||||
|
||||
it('should handle parallel agents without existing instructions', async () => {
|
||||
const memoryContent = 'User is a data scientist.';
|
||||
client.useMemory = jest.fn().mockResolvedValue(memoryContent);
|
||||
|
||||
const parallelAgentNoInstructions = {
|
||||
id: 'parallel-agent-no-instructions',
|
||||
name: 'Parallel Agent No Instructions',
|
||||
provider: EModelEndpoint.openAI,
|
||||
};
|
||||
|
||||
client.agentConfigs = new Map([
|
||||
['parallel-agent-no-instructions', parallelAgentNoInstructions],
|
||||
]);
|
||||
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: null,
|
||||
sender: 'User',
|
||||
text: 'Hello',
|
||||
isCreatedByUser: true,
|
||||
},
|
||||
];
|
||||
|
||||
await client.buildMessages(messages, null, {
|
||||
instructions: null,
|
||||
additional_instructions: null,
|
||||
});
|
||||
|
||||
expect(parallelAgentNoInstructions.instructions).toContain(memoryContent);
|
||||
});
|
||||
|
||||
it('should not modify agentConfigs when none exist', async () => {
|
||||
const memoryContent = 'User prefers concise responses.';
|
||||
client.useMemory = jest.fn().mockResolvedValue(memoryContent);
|
||||
|
||||
client.agentConfigs = null;
|
||||
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: null,
|
||||
sender: 'User',
|
||||
text: 'Hello',
|
||||
isCreatedByUser: true,
|
||||
},
|
||||
];
|
||||
|
||||
await expect(
|
||||
client.buildMessages(messages, null, {
|
||||
instructions: 'Base instructions',
|
||||
additional_instructions: null,
|
||||
}),
|
||||
).resolves.not.toThrow();
|
||||
|
||||
expect(client.options.agent.instructions).toContain(memoryContent);
|
||||
});
|
||||
|
||||
it('should handle empty agentConfigs map', async () => {
|
||||
const memoryContent = 'User likes detailed explanations.';
|
||||
client.useMemory = jest.fn().mockResolvedValue(memoryContent);
|
||||
|
||||
client.agentConfigs = new Map();
|
||||
|
||||
const messages = [
|
||||
{
|
||||
messageId: 'msg-1',
|
||||
parentMessageId: null,
|
||||
sender: 'User',
|
||||
text: 'Hello',
|
||||
isCreatedByUser: true,
|
||||
},
|
||||
];
|
||||
|
||||
await expect(
|
||||
client.buildMessages(messages, null, {
|
||||
instructions: 'Base instructions',
|
||||
additional_instructions: null,
|
||||
}),
|
||||
).resolves.not.toThrow();
|
||||
|
||||
expect(client.options.agent.instructions).toContain(memoryContent);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue