mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-22 18:26:12 +01:00
💰 fix: Multi-Agent Token Spending & Prevent Double-Spend (#11433)
* fix: Token Spending Logic for Multi-Agents on Abort Scenarios * Implemented logic to skip token spending if a conversation is aborted, preventing double-spending. * Introduced `spendCollectedUsage` function to handle token spending for multiple models during aborts, ensuring accurate accounting for parallel agents. * Updated `GenerationJobManager` to store and retrieve collected usage data for improved abort handling. * Added comprehensive tests for the new functionality, covering various scenarios including cache token handling and parallel agent usage. * fix: Memory Context Handling for Multi-Agents * Refactored `buildMessages` method to pass memory context to parallel agents, ensuring they share the same user context. * Improved handling of memory context when no existing instructions are present for parallel agents. * Added comprehensive tests to verify memory context propagation and behavior under various scenarios, including cases with no memory available and empty agent configurations. * Enhanced logging for better traceability of memory context additions to agents. * chore: Memory Context Documentation for Parallel Agents * Updated documentation in the `AgentClient` class to clarify the in-place mutation of agentConfig objects when passing memory context to parallel agents. * Added notes on the implications of mutating objects directly to ensure all parallel agents receive the correct memory context before execution. * chore: UsageMetadata Interface docs for Token Spending * Expanded the UsageMetadata interface to support both OpenAI and Anthropic cache token formats. * Added detailed documentation for cache token properties, including mutually exclusive fields for different model types. * Improved clarity on how to access cache token details for accurate token spending tracking. * fix: Enhance Token Spending Logic in Abort Middleware * Refactored `spendCollectedUsage` function to utilize Promise.all for concurrent token spending, improving performance and ensuring all operations complete before clearing the collectedUsage array. * Added documentation to clarify the importance of clearing the collectedUsage array to prevent double-spending in abort scenarios. * Updated tests to verify the correct behavior of the spending logic and the clearing of the array after spending operations.
This commit is contained in:
parent
32e6f3b8e5
commit
36c5a88c4e
11 changed files with 1440 additions and 28 deletions
|
|
@ -1,9 +1,11 @@
|
|||
import { logger } from '@librechat/data-schemas';
|
||||
import type { StandardGraph } from '@librechat/agents';
|
||||
import type { Agents } from 'librechat-data-provider';
|
||||
import { parseTextParts } from 'librechat-data-provider';
|
||||
import type { Agents, TMessageContentParts } from 'librechat-data-provider';
|
||||
import type {
|
||||
SerializableJobData,
|
||||
IEventTransport,
|
||||
UsageMetadata,
|
||||
AbortResult,
|
||||
IJobStore,
|
||||
} from './interfaces/IJobStore';
|
||||
|
|
@ -585,7 +587,14 @@ class GenerationJobManagerClass {
|
|||
|
||||
if (!jobData) {
|
||||
logger.warn(`[GenerationJobManager] Cannot abort - job not found: ${streamId}`);
|
||||
return { success: false, jobData: null, content: [], finalEvent: null };
|
||||
return {
|
||||
text: '',
|
||||
content: [],
|
||||
jobData: null,
|
||||
success: false,
|
||||
finalEvent: null,
|
||||
collectedUsage: [],
|
||||
};
|
||||
}
|
||||
|
||||
// Emit abort signal for cross-replica support (Redis mode)
|
||||
|
|
@ -599,15 +608,21 @@ class GenerationJobManagerClass {
|
|||
runtime.abortController.abort();
|
||||
}
|
||||
|
||||
// Get content before clearing state
|
||||
/** Content before clearing state */
|
||||
const result = await this.jobStore.getContentParts(streamId);
|
||||
const content = result?.content ?? [];
|
||||
|
||||
// Detect "early abort" - aborted before any generation happened (e.g., during tool loading)
|
||||
// In this case, no messages were saved to DB, so frontend shouldn't navigate to conversation
|
||||
/** Collected usage for all models */
|
||||
const collectedUsage = this.jobStore.getCollectedUsage(streamId);
|
||||
|
||||
/** Text from content parts for fallback token counting */
|
||||
const text = parseTextParts(content as TMessageContentParts[]);
|
||||
|
||||
/** Detect "early abort" - aborted before any generation happened (e.g., during tool loading)
|
||||
In this case, no messages were saved to DB, so frontend shouldn't navigate to conversation */
|
||||
const isEarlyAbort = content.length === 0 && !jobData.responseMessageId;
|
||||
|
||||
// Create final event for abort
|
||||
/** Final event for abort */
|
||||
const userMessageId = jobData.userMessage?.messageId;
|
||||
|
||||
const abortFinalEvent: t.ServerSentEvent = {
|
||||
|
|
@ -669,6 +684,8 @@ class GenerationJobManagerClass {
|
|||
jobData,
|
||||
content,
|
||||
finalEvent: abortFinalEvent,
|
||||
text,
|
||||
collectedUsage,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -933,6 +950,18 @@ class GenerationJobManagerClass {
|
|||
this.jobStore.setContentParts(streamId, contentParts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set reference to the collectedUsage array.
|
||||
* This array accumulates token usage from all models during generation.
|
||||
*/
|
||||
setCollectedUsage(streamId: string, collectedUsage: UsageMetadata[]): void {
|
||||
// Use runtime state check for performance (sync check)
|
||||
if (!this.runtimeState.has(streamId)) {
|
||||
return;
|
||||
}
|
||||
this.jobStore.setCollectedUsage(streamId, collectedUsage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set reference to the graph instance.
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue