mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-22 18:26:12 +01:00
💰 fix: Multi-Agent Token Spending & Prevent Double-Spend (#11433)
* fix: Token Spending Logic for Multi-Agents on Abort Scenarios * Implemented logic to skip token spending if a conversation is aborted, preventing double-spending. * Introduced `spendCollectedUsage` function to handle token spending for multiple models during aborts, ensuring accurate accounting for parallel agents. * Updated `GenerationJobManager` to store and retrieve collected usage data for improved abort handling. * Added comprehensive tests for the new functionality, covering various scenarios including cache token handling and parallel agent usage. * fix: Memory Context Handling for Multi-Agents * Refactored `buildMessages` method to pass memory context to parallel agents, ensuring they share the same user context. * Improved handling of memory context when no existing instructions are present for parallel agents. * Added comprehensive tests to verify memory context propagation and behavior under various scenarios, including cases with no memory available and empty agent configurations. * Enhanced logging for better traceability of memory context additions to agents. * chore: Memory Context Documentation for Parallel Agents * Updated documentation in the `AgentClient` class to clarify the in-place mutation of agentConfig objects when passing memory context to parallel agents. * Added notes on the implications of mutating objects directly to ensure all parallel agents receive the correct memory context before execution. * chore: UsageMetadata Interface docs for Token Spending * Expanded the UsageMetadata interface to support both OpenAI and Anthropic cache token formats. * Added detailed documentation for cache token properties, including mutually exclusive fields for different model types. * Improved clarity on how to access cache token details for accurate token spending tracking. * fix: Enhance Token Spending Logic in Abort Middleware * Refactored `spendCollectedUsage` function to utilize Promise.all for concurrent token spending, improving performance and ensuring all operations complete before clearing the collectedUsage array. * Added documentation to clarify the importance of clearing the collectedUsage array to prevent double-spending in abort scenarios. * Updated tests to verify the correct behavior of the spending logic and the clearing of the array after spending operations.
This commit is contained in:
parent
32e6f3b8e5
commit
36c5a88c4e
11 changed files with 1440 additions and 28 deletions
|
|
@ -45,6 +45,54 @@ export interface SerializableJobData {
|
|||
promptTokens?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Usage metadata for token spending across different LLM providers.
|
||||
*
|
||||
* This interface supports two mutually exclusive cache token formats:
|
||||
*
|
||||
* **OpenAI format** (GPT-4, o1, etc.):
|
||||
* - Uses `input_token_details.cache_creation` and `input_token_details.cache_read`
|
||||
* - Cache tokens are nested under the `input_token_details` object
|
||||
*
|
||||
* **Anthropic format** (Claude models):
|
||||
* - Uses `cache_creation_input_tokens` and `cache_read_input_tokens`
|
||||
* - Cache tokens are top-level properties
|
||||
*
|
||||
* When processing usage data, check both formats:
|
||||
* ```typescript
|
||||
* const cacheCreation = usage.input_token_details?.cache_creation
|
||||
* || usage.cache_creation_input_tokens || 0;
|
||||
* ```
|
||||
*/
|
||||
export interface UsageMetadata {
|
||||
/** Total input tokens (prompt tokens) */
|
||||
input_tokens?: number;
|
||||
/** Total output tokens (completion tokens) */
|
||||
output_tokens?: number;
|
||||
/** Model identifier that generated this usage */
|
||||
model?: string;
|
||||
/**
|
||||
* OpenAI-style cache token details.
|
||||
* Present for OpenAI models (GPT-4, o1, etc.)
|
||||
*/
|
||||
input_token_details?: {
|
||||
/** Tokens written to cache */
|
||||
cache_creation?: number;
|
||||
/** Tokens read from cache */
|
||||
cache_read?: number;
|
||||
};
|
||||
/**
|
||||
* Anthropic-style cache creation tokens.
|
||||
* Present for Claude models. Mutually exclusive with input_token_details.
|
||||
*/
|
||||
cache_creation_input_tokens?: number;
|
||||
/**
|
||||
* Anthropic-style cache read tokens.
|
||||
* Present for Claude models. Mutually exclusive with input_token_details.
|
||||
*/
|
||||
cache_read_input_tokens?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result returned from aborting a job - contains all data needed
|
||||
* for token spending and message saving without storing callbacks
|
||||
|
|
@ -58,6 +106,10 @@ export interface AbortResult {
|
|||
content: Agents.MessageContentComplex[];
|
||||
/** Final event to send to client */
|
||||
finalEvent: unknown;
|
||||
/** Concatenated text from all content parts for token counting fallback */
|
||||
text: string;
|
||||
/** Collected usage metadata from all models for token spending */
|
||||
collectedUsage: UsageMetadata[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -210,6 +262,23 @@ export interface IJobStore {
|
|||
* @param runSteps - Run steps to save
|
||||
*/
|
||||
saveRunSteps?(streamId: string, runSteps: Agents.RunStep[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Set collected usage reference for a job.
|
||||
* This array accumulates token usage from all models during generation.
|
||||
*
|
||||
* @param streamId - The stream identifier
|
||||
* @param collectedUsage - Array of usage metadata from all models
|
||||
*/
|
||||
setCollectedUsage(streamId: string, collectedUsage: UsageMetadata[]): void;
|
||||
|
||||
/**
|
||||
* Get collected usage for a job.
|
||||
*
|
||||
* @param streamId - The stream identifier
|
||||
* @returns Array of usage metadata or empty array
|
||||
*/
|
||||
getCollectedUsage(streamId: string): UsageMetadata[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue