💰 fix: Multi-Agent Token Spending & Prevent Double-Spend (#11433)

* fix: Token Spending Logic for Multi-Agents on Abort Scenarios

* Implemented logic to skip token spending if a conversation is aborted, preventing double-spending.
* Introduced `spendCollectedUsage` function to handle token spending for multiple models during aborts, ensuring accurate accounting for parallel agents.
* Updated `GenerationJobManager` to store and retrieve collected usage data for improved abort handling.
* Added comprehensive tests for the new functionality, covering various scenarios including cache token handling and parallel agent usage.

* fix: Memory Context Handling for Multi-Agents

* Refactored `buildMessages` method to pass memory context to parallel agents, ensuring they share the same user context.
* Improved handling of memory context when no existing instructions are present for parallel agents.
* Added comprehensive tests to verify memory context propagation and behavior under various scenarios, including cases with no memory available and empty agent configurations.
* Enhanced logging for better traceability of memory context additions to agents.

* chore: Memory Context Documentation for Parallel Agents

* Updated documentation in the `AgentClient` class to clarify the in-place mutation of agentConfig objects when passing memory context to parallel agents.
* Added notes on the implications of mutating objects directly to ensure all parallel agents receive the correct memory context before execution.

* chore: UsageMetadata Interface docs for Token Spending

* Expanded the UsageMetadata interface to support both OpenAI and Anthropic cache token formats.
* Added detailed documentation for cache token properties, including mutually exclusive fields for different model types.
* Improved clarity on how to access cache token details for accurate token spending tracking.

* fix: Enhance Token Spending Logic in Abort Middleware

* Refactored `spendCollectedUsage` function to utilize Promise.all for concurrent token spending, improving performance and ensuring all operations complete before clearing the collectedUsage array.
* Added documentation to clarify the importance of clearing the collectedUsage array to prevent double-spending in abort scenarios.
* Updated tests to verify the correct behavior of the spending logic and the clearing of the array after spending operations.
This commit is contained in:
Danny Avila 2026-01-20 14:43:19 -05:00 committed by GitHub
parent 32e6f3b8e5
commit 36c5a88c4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1440 additions and 28 deletions

View file

@ -1,9 +1,11 @@
import { logger } from '@librechat/data-schemas';
import type { StandardGraph } from '@librechat/agents';
import type { Agents } from 'librechat-data-provider';
import { parseTextParts } from 'librechat-data-provider';
import type { Agents, TMessageContentParts } from 'librechat-data-provider';
import type {
SerializableJobData,
IEventTransport,
UsageMetadata,
AbortResult,
IJobStore,
} from './interfaces/IJobStore';
@ -585,7 +587,14 @@ class GenerationJobManagerClass {
if (!jobData) {
logger.warn(`[GenerationJobManager] Cannot abort - job not found: ${streamId}`);
return { success: false, jobData: null, content: [], finalEvent: null };
return {
text: '',
content: [],
jobData: null,
success: false,
finalEvent: null,
collectedUsage: [],
};
}
// Emit abort signal for cross-replica support (Redis mode)
@ -599,15 +608,21 @@ class GenerationJobManagerClass {
runtime.abortController.abort();
}
// Get content before clearing state
/** Content before clearing state */
const result = await this.jobStore.getContentParts(streamId);
const content = result?.content ?? [];
// Detect "early abort" - aborted before any generation happened (e.g., during tool loading)
// In this case, no messages were saved to DB, so frontend shouldn't navigate to conversation
/** Collected usage for all models */
const collectedUsage = this.jobStore.getCollectedUsage(streamId);
/** Text from content parts for fallback token counting */
const text = parseTextParts(content as TMessageContentParts[]);
/** Detect "early abort" - aborted before any generation happened (e.g., during tool loading)
In this case, no messages were saved to DB, so frontend shouldn't navigate to conversation */
const isEarlyAbort = content.length === 0 && !jobData.responseMessageId;
// Create final event for abort
/** Final event for abort */
const userMessageId = jobData.userMessage?.messageId;
const abortFinalEvent: t.ServerSentEvent = {
@ -669,6 +684,8 @@ class GenerationJobManagerClass {
jobData,
content,
finalEvent: abortFinalEvent,
text,
collectedUsage,
};
}
@ -933,6 +950,18 @@ class GenerationJobManagerClass {
this.jobStore.setContentParts(streamId, contentParts);
}
/**
* Set reference to the collectedUsage array.
* This array accumulates token usage from all models during generation.
*/
setCollectedUsage(streamId: string, collectedUsage: UsageMetadata[]): void {
// Use runtime state check for performance (sync check)
if (!this.runtimeState.has(streamId)) {
return;
}
this.jobStore.setCollectedUsage(streamId, collectedUsage);
}
/**
* Set reference to the graph instance.
*/

View file

@ -0,0 +1,482 @@
/**
* Tests for collected usage functionality in GenerationJobManager.
*
* This tests the storage and retrieval of collectedUsage for abort handling,
* ensuring all models (including parallel agents from addedConvo) have their
* tokens spent when a conversation is aborted.
*/
import type { UsageMetadata } from '../interfaces/IJobStore';
describe('CollectedUsage - InMemoryJobStore', () => {
beforeEach(() => {
jest.resetModules();
});
it('should store and retrieve collectedUsage', async () => {
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const store = new InMemoryJobStore();
await store.initialize();
const streamId = 'test-stream-1';
await store.createJob(streamId, 'user-1');
const collectedUsage: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
{ input_tokens: 80, output_tokens: 40, model: 'claude-3' },
];
store.setCollectedUsage(streamId, collectedUsage);
const retrieved = store.getCollectedUsage(streamId);
expect(retrieved).toEqual(collectedUsage);
expect(retrieved).toHaveLength(2);
await store.destroy();
});
it('should return empty array when no collectedUsage set', async () => {
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const store = new InMemoryJobStore();
await store.initialize();
const streamId = 'test-stream-2';
await store.createJob(streamId, 'user-1');
const retrieved = store.getCollectedUsage(streamId);
expect(retrieved).toEqual([]);
await store.destroy();
});
it('should return empty array for non-existent stream', async () => {
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const store = new InMemoryJobStore();
await store.initialize();
const retrieved = store.getCollectedUsage('non-existent-stream');
expect(retrieved).toEqual([]);
await store.destroy();
});
it('should update collectedUsage when set multiple times', async () => {
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const store = new InMemoryJobStore();
await store.initialize();
const streamId = 'test-stream-3';
await store.createJob(streamId, 'user-1');
const usage1: UsageMetadata[] = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }];
store.setCollectedUsage(streamId, usage1);
// Simulate more usage being added
const usage2: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
{ input_tokens: 80, output_tokens: 40, model: 'claude-3' },
];
store.setCollectedUsage(streamId, usage2);
const retrieved = store.getCollectedUsage(streamId);
expect(retrieved).toHaveLength(2);
await store.destroy();
});
it('should clear collectedUsage when clearContentState is called', async () => {
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const store = new InMemoryJobStore();
await store.initialize();
const streamId = 'test-stream-4';
await store.createJob(streamId, 'user-1');
const collectedUsage: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
];
store.setCollectedUsage(streamId, collectedUsage);
expect(store.getCollectedUsage(streamId)).toHaveLength(1);
store.clearContentState(streamId);
expect(store.getCollectedUsage(streamId)).toEqual([]);
await store.destroy();
});
it('should clear collectedUsage when job is deleted', async () => {
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const store = new InMemoryJobStore();
await store.initialize();
const streamId = 'test-stream-5';
await store.createJob(streamId, 'user-1');
const collectedUsage: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
];
store.setCollectedUsage(streamId, collectedUsage);
await store.deleteJob(streamId);
expect(store.getCollectedUsage(streamId)).toEqual([]);
await store.destroy();
});
});
describe('CollectedUsage - GenerationJobManager', () => {
beforeEach(() => {
jest.resetModules();
});
it('should set and retrieve collectedUsage through manager', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `manager-test-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
const collectedUsage: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
{ input_tokens: 80, output_tokens: 40, model: 'claude-3' },
];
GenerationJobManager.setCollectedUsage(streamId, collectedUsage);
// Retrieve through abort
const abortResult = await GenerationJobManager.abortJob(streamId);
expect(abortResult.collectedUsage).toEqual(collectedUsage);
expect(abortResult.collectedUsage).toHaveLength(2);
await GenerationJobManager.destroy();
});
it('should return empty collectedUsage when none set', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `no-usage-test-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
const abortResult = await GenerationJobManager.abortJob(streamId);
expect(abortResult.collectedUsage).toEqual([]);
await GenerationJobManager.destroy();
});
it('should not set collectedUsage if job does not exist', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
});
await GenerationJobManager.initialize();
const collectedUsage: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
];
// This should not throw, just silently do nothing
GenerationJobManager.setCollectedUsage('non-existent-stream', collectedUsage);
const abortResult = await GenerationJobManager.abortJob('non-existent-stream');
expect(abortResult.success).toBe(false);
await GenerationJobManager.destroy();
});
});
describe('AbortJob - Text and CollectedUsage', () => {
beforeEach(() => {
jest.resetModules();
});
it('should extract text from content parts on abort', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `text-extract-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
// Set content parts with text
const contentParts = [
{ type: 'text', text: 'Hello ' },
{ type: 'text', text: 'world!' },
];
GenerationJobManager.setContentParts(streamId, contentParts as never);
const abortResult = await GenerationJobManager.abortJob(streamId);
expect(abortResult.text).toBe('Hello world!');
expect(abortResult.success).toBe(true);
await GenerationJobManager.destroy();
});
it('should return empty text when no content parts', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `empty-text-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
const abortResult = await GenerationJobManager.abortJob(streamId);
expect(abortResult.text).toBe('');
await GenerationJobManager.destroy();
});
it('should return both text and collectedUsage on abort', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `full-abort-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
// Set content parts
const contentParts = [{ type: 'text', text: 'Partial response...' }];
GenerationJobManager.setContentParts(streamId, contentParts as never);
// Set collected usage
const collectedUsage: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
{ input_tokens: 80, output_tokens: 40, model: 'claude-3' },
];
GenerationJobManager.setCollectedUsage(streamId, collectedUsage);
const abortResult = await GenerationJobManager.abortJob(streamId);
expect(abortResult.success).toBe(true);
expect(abortResult.text).toBe('Partial response...');
expect(abortResult.collectedUsage).toEqual(collectedUsage);
expect(abortResult.content).toHaveLength(1);
await GenerationJobManager.destroy();
});
it('should return empty values for non-existent job', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
});
await GenerationJobManager.initialize();
const abortResult = await GenerationJobManager.abortJob('non-existent-job');
expect(abortResult.success).toBe(false);
expect(abortResult.text).toBe('');
expect(abortResult.collectedUsage).toEqual([]);
expect(abortResult.content).toEqual([]);
expect(abortResult.jobData).toBeNull();
await GenerationJobManager.destroy();
});
});
describe('Real-world Scenarios', () => {
beforeEach(() => {
jest.resetModules();
});
it('should handle parallel agent abort with collected usage', async () => {
/**
* Scenario: User aborts a conversation with addedConvo (parallel agents)
* - Primary agent: gemini-3-flash-preview
* - Parallel agent: gpt-5.2
* Both should have their tokens spent on abort
*/
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `parallel-abort-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
// Simulate content from primary agent
const contentParts = [
{ type: 'text', text: 'Primary agent output...' },
{ type: 'text', text: 'More content...' },
];
GenerationJobManager.setContentParts(streamId, contentParts as never);
// Simulate collected usage from both agents (as would happen during generation)
const collectedUsage: UsageMetadata[] = [
{
input_tokens: 31596,
output_tokens: 151,
model: 'gemini-3-flash-preview',
},
{
input_tokens: 28000,
output_tokens: 120,
model: 'gpt-5.2',
},
];
GenerationJobManager.setCollectedUsage(streamId, collectedUsage);
// Abort the job
const abortResult = await GenerationJobManager.abortJob(streamId);
// Verify both models' usage is returned
expect(abortResult.success).toBe(true);
expect(abortResult.collectedUsage).toHaveLength(2);
expect(abortResult.collectedUsage[0].model).toBe('gemini-3-flash-preview');
expect(abortResult.collectedUsage[1].model).toBe('gpt-5.2');
// Verify text is extracted
expect(abortResult.text).toContain('Primary agent output');
await GenerationJobManager.destroy();
});
it('should handle abort with cache tokens from Anthropic', async () => {
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `cache-abort-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
// Anthropic-style cache tokens
const collectedUsage: UsageMetadata[] = [
{
input_tokens: 788,
output_tokens: 163,
cache_creation_input_tokens: 30808,
cache_read_input_tokens: 0,
model: 'claude-opus-4-5-20251101',
},
];
GenerationJobManager.setCollectedUsage(streamId, collectedUsage);
const abortResult = await GenerationJobManager.abortJob(streamId);
expect(abortResult.collectedUsage[0].cache_creation_input_tokens).toBe(30808);
await GenerationJobManager.destroy();
});
it('should handle abort with sequential tool calls usage', async () => {
/**
* Scenario: Single agent with multiple tool calls, aborted mid-execution
* Usage accumulates for each LLM call
*/
const { GenerationJobManager } = await import('../GenerationJobManager');
const { InMemoryJobStore } = await import('../implementations/InMemoryJobStore');
const { InMemoryEventTransport } = await import('../implementations/InMemoryEventTransport');
GenerationJobManager.configure({
jobStore: new InMemoryJobStore(),
eventTransport: new InMemoryEventTransport(),
isRedis: false,
cleanupOnComplete: false,
});
await GenerationJobManager.initialize();
const streamId = `sequential-abort-${Date.now()}`;
await GenerationJobManager.createJob(streamId, 'user-1');
// Usage from multiple sequential LLM calls (tool use pattern)
const collectedUsage: UsageMetadata[] = [
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }, // Initial call
{ input_tokens: 150, output_tokens: 30, model: 'gpt-4' }, // After tool result 1
{ input_tokens: 180, output_tokens: 20, model: 'gpt-4' }, // After tool result 2 (aborted here)
];
GenerationJobManager.setCollectedUsage(streamId, collectedUsage);
const abortResult = await GenerationJobManager.abortJob(streamId);
expect(abortResult.collectedUsage).toHaveLength(3);
// All three entries should be present for proper token accounting
await GenerationJobManager.destroy();
});
});

View file

@ -1,7 +1,12 @@
import { logger } from '@librechat/data-schemas';
import type { StandardGraph } from '@librechat/agents';
import type { Agents } from 'librechat-data-provider';
import type { IJobStore, SerializableJobData, JobStatus } from '~/stream/interfaces/IJobStore';
import type {
SerializableJobData,
UsageMetadata,
IJobStore,
JobStatus,
} from '~/stream/interfaces/IJobStore';
/**
* Content state for a job - volatile, in-memory only.
@ -10,6 +15,7 @@ import type { IJobStore, SerializableJobData, JobStatus } from '~/stream/interfa
interface ContentState {
contentParts: Agents.MessageContentComplex[];
graphRef: WeakRef<StandardGraph> | null;
collectedUsage: UsageMetadata[];
}
/**
@ -240,6 +246,7 @@ export class InMemoryJobStore implements IJobStore {
this.contentState.set(streamId, {
contentParts: [],
graphRef: new WeakRef(graph),
collectedUsage: [],
});
}
}
@ -252,10 +259,30 @@ export class InMemoryJobStore implements IJobStore {
if (existing) {
existing.contentParts = contentParts;
} else {
this.contentState.set(streamId, { contentParts, graphRef: null });
this.contentState.set(streamId, { contentParts, graphRef: null, collectedUsage: [] });
}
}
/**
* Set collected usage reference for a job.
*/
setCollectedUsage(streamId: string, collectedUsage: UsageMetadata[]): void {
const existing = this.contentState.get(streamId);
if (existing) {
existing.collectedUsage = collectedUsage;
} else {
this.contentState.set(streamId, { contentParts: [], graphRef: null, collectedUsage });
}
}
/**
* Get collected usage for a job.
*/
getCollectedUsage(streamId: string): UsageMetadata[] {
const state = this.contentState.get(streamId);
return state?.collectedUsage ?? [];
}
/**
* Get content parts for a job.
* Returns live content from stored reference.

View file

@ -1,9 +1,14 @@
import { logger } from '@librechat/data-schemas';
import { createContentAggregator } from '@librechat/agents';
import type { IJobStore, SerializableJobData, JobStatus } from '~/stream/interfaces/IJobStore';
import type { StandardGraph } from '@librechat/agents';
import type { Agents } from 'librechat-data-provider';
import type { Redis, Cluster } from 'ioredis';
import type {
SerializableJobData,
UsageMetadata,
IJobStore,
JobStatus,
} from '~/stream/interfaces/IJobStore';
/**
* Key prefixes for Redis storage.
@ -90,6 +95,13 @@ export class RedisJobStore implements IJobStore {
*/
private localGraphCache = new Map<string, WeakRef<StandardGraph>>();
/**
* Local cache for collectedUsage arrays.
* Generation happens on a single instance, so collectedUsage is only available locally.
* For cross-replica abort, the abort handler falls back to text-based token counting.
*/
private localCollectedUsageCache = new Map<string, UsageMetadata[]>();
/** Cleanup interval in ms (1 minute) */
private cleanupIntervalMs = 60000;
@ -227,6 +239,7 @@ export class RedisJobStore implements IJobStore {
async deleteJob(streamId: string): Promise<void> {
// Clear local caches
this.localGraphCache.delete(streamId);
this.localCollectedUsageCache.delete(streamId);
// Note: userJobs cleanup is handled lazily via self-healing in getActiveJobIdsByUser
// In cluster mode, separate runningJobs (global) from stream-specific keys (same slot)
@ -290,6 +303,7 @@ export class RedisJobStore implements IJobStore {
if (!job) {
await this.redis.srem(KEYS.runningJobs, streamId);
this.localGraphCache.delete(streamId);
this.localCollectedUsageCache.delete(streamId);
cleaned++;
continue;
}
@ -298,6 +312,7 @@ export class RedisJobStore implements IJobStore {
if (job.status !== 'running') {
await this.redis.srem(KEYS.runningJobs, streamId);
this.localGraphCache.delete(streamId);
this.localCollectedUsageCache.delete(streamId);
cleaned++;
continue;
}
@ -382,6 +397,7 @@ export class RedisJobStore implements IJobStore {
}
// Clear local caches
this.localGraphCache.clear();
this.localCollectedUsageCache.clear();
// Don't close the Redis connection - it's shared
logger.info('[RedisJobStore] Destroyed');
}
@ -406,11 +422,28 @@ export class RedisJobStore implements IJobStore {
* No-op for Redis - content parts are reconstructed from chunks.
* Metadata (agentId, groupId) is embedded directly on content parts by the agent runtime.
*/
setContentParts(_streamId: string, _contentParts: Agents.MessageContentComplex[]): void {
setContentParts(): void {
// Content parts are reconstructed from chunks during getContentParts
// No separate storage needed
}
/**
* Store collectedUsage reference in local cache.
* This is used for abort handling to spend tokens for all models.
* Note: Only available on the generating instance; cross-replica abort uses fallback.
*/
setCollectedUsage(streamId: string, collectedUsage: UsageMetadata[]): void {
this.localCollectedUsageCache.set(streamId, collectedUsage);
}
/**
* Get collected usage for a job.
* Only available if this is the generating instance.
*/
getCollectedUsage(streamId: string): UsageMetadata[] {
return this.localCollectedUsageCache.get(streamId) ?? [];
}
/**
* Get aggregated content - tries local cache first, falls back to Redis reconstruction.
*
@ -528,6 +561,7 @@ export class RedisJobStore implements IJobStore {
clearContentState(streamId: string): void {
// Clear local caches immediately
this.localGraphCache.delete(streamId);
this.localCollectedUsageCache.delete(streamId);
// Fire and forget - async cleanup for Redis
this.clearContentStateAsync(streamId).catch((err) => {

View file

@ -5,11 +5,12 @@ export {
} from './GenerationJobManager';
export type {
AbortResult,
SerializableJobData,
IEventTransport,
UsageMetadata,
AbortResult,
JobStatus,
IJobStore,
IEventTransport,
} from './interfaces/IJobStore';
export { createStreamServices } from './createStreamServices';

View file

@ -45,6 +45,54 @@ export interface SerializableJobData {
promptTokens?: number;
}
/**
* Usage metadata for token spending across different LLM providers.
*
* This interface supports two mutually exclusive cache token formats:
*
* **OpenAI format** (GPT-4, o1, etc.):
* - Uses `input_token_details.cache_creation` and `input_token_details.cache_read`
* - Cache tokens are nested under the `input_token_details` object
*
* **Anthropic format** (Claude models):
* - Uses `cache_creation_input_tokens` and `cache_read_input_tokens`
* - Cache tokens are top-level properties
*
* When processing usage data, check both formats:
* ```typescript
* const cacheCreation = usage.input_token_details?.cache_creation
* || usage.cache_creation_input_tokens || 0;
* ```
*/
export interface UsageMetadata {
/** Total input tokens (prompt tokens) */
input_tokens?: number;
/** Total output tokens (completion tokens) */
output_tokens?: number;
/** Model identifier that generated this usage */
model?: string;
/**
* OpenAI-style cache token details.
* Present for OpenAI models (GPT-4, o1, etc.)
*/
input_token_details?: {
/** Tokens written to cache */
cache_creation?: number;
/** Tokens read from cache */
cache_read?: number;
};
/**
* Anthropic-style cache creation tokens.
* Present for Claude models. Mutually exclusive with input_token_details.
*/
cache_creation_input_tokens?: number;
/**
* Anthropic-style cache read tokens.
* Present for Claude models. Mutually exclusive with input_token_details.
*/
cache_read_input_tokens?: number;
}
/**
* Result returned from aborting a job - contains all data needed
* for token spending and message saving without storing callbacks
@ -58,6 +106,10 @@ export interface AbortResult {
content: Agents.MessageContentComplex[];
/** Final event to send to client */
finalEvent: unknown;
/** Concatenated text from all content parts for token counting fallback */
text: string;
/** Collected usage metadata from all models for token spending */
collectedUsage: UsageMetadata[];
}
/**
@ -210,6 +262,23 @@ export interface IJobStore {
* @param runSteps - Run steps to save
*/
saveRunSteps?(streamId: string, runSteps: Agents.RunStep[]): Promise<void>;
/**
* Set collected usage reference for a job.
* This array accumulates token usage from all models during generation.
*
* @param streamId - The stream identifier
* @param collectedUsage - Array of usage metadata from all models
*/
setCollectedUsage(streamId: string, collectedUsage: UsageMetadata[]): void;
/**
* Get collected usage for a job.
*
* @param streamId - The stream identifier
* @returns Array of usage metadata or empty array
*/
getCollectedUsage(streamId: string): UsageMetadata[];
}
/**