mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-03-02 22:30:18 +01:00
📉 feat: Add Token Usage Tracking for Agents API Routes (#11600)
* feat: Implement token usage tracking for OpenAI and Responses controllers - Added functionality to record token usage against user balances in OpenAIChatCompletionController and createResponse functions. - Introduced new utility functions for managing token spending and structured token usage. - Enhanced error handling for token recording to improve logging and debugging capabilities. - Updated imports to include new usage tracking methods and configurations. * test: Add unit tests for recordCollectedUsage function in usage.spec.ts - Introduced comprehensive tests for the recordCollectedUsage function, covering various scenarios including handling empty and null collectedUsage, single and multiple usage entries, and sequential and parallel execution cases. - Enhanced token handling tests to ensure correct calculations for both OpenAI and Anthropic formats, including cache token management. - Improved overall test coverage for usage tracking functionality, ensuring robust validation of expected behaviors and outcomes. * test: Add unit tests for OpenAI and Responses API controllers - Introduced comprehensive unit tests for the OpenAIChatCompletionController and createResponse functions, focusing on the correct invocation of recordCollectedUsage for token spending. - Enhanced tests to validate the passing of balance and transactions configuration to the recordCollectedUsage function. - Ensured proper dependency injection of spendTokens and spendStructuredTokens in the usage recording process. - Improved overall test coverage for token usage tracking, ensuring robust validation of expected behaviors and outcomes.
This commit is contained in:
parent
d13037881a
commit
9a38af5875
7 changed files with 1190 additions and 3 deletions
146
packages/api/src/agents/usage.ts
Normal file
146
packages/api/src/agents/usage.ts
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
import { logger } from '@librechat/data-schemas';
|
||||
import type { TCustomConfig, TTransactionsConfig } from 'librechat-data-provider';
|
||||
import type { UsageMetadata } from '../stream/interfaces/IJobStore';
|
||||
import type { EndpointTokenConfig } from '../types/tokens';
|
||||
|
||||
interface TokenUsage {
|
||||
promptTokens?: number;
|
||||
completionTokens?: number;
|
||||
}
|
||||
|
||||
interface StructuredPromptTokens {
|
||||
input?: number;
|
||||
write?: number;
|
||||
read?: number;
|
||||
}
|
||||
|
||||
interface StructuredTokenUsage {
|
||||
promptTokens?: StructuredPromptTokens;
|
||||
completionTokens?: number;
|
||||
}
|
||||
|
||||
interface TxMetadata {
|
||||
user: string;
|
||||
model?: string;
|
||||
context: string;
|
||||
conversationId: string;
|
||||
balance?: Partial<TCustomConfig['balance']> | null;
|
||||
transactions?: Partial<TTransactionsConfig>;
|
||||
endpointTokenConfig?: EndpointTokenConfig;
|
||||
}
|
||||
|
||||
type SpendTokensFn = (txData: TxMetadata, tokenUsage: TokenUsage) => Promise<unknown>;
|
||||
type SpendStructuredTokensFn = (
|
||||
txData: TxMetadata,
|
||||
tokenUsage: StructuredTokenUsage,
|
||||
) => Promise<unknown>;
|
||||
|
||||
export interface RecordUsageDeps {
|
||||
spendTokens: SpendTokensFn;
|
||||
spendStructuredTokens: SpendStructuredTokensFn;
|
||||
}
|
||||
|
||||
export interface RecordUsageParams {
|
||||
user: string;
|
||||
conversationId: string;
|
||||
collectedUsage: UsageMetadata[];
|
||||
model?: string;
|
||||
context?: string;
|
||||
balance?: Partial<TCustomConfig['balance']> | null;
|
||||
transactions?: Partial<TTransactionsConfig>;
|
||||
endpointTokenConfig?: EndpointTokenConfig;
|
||||
}
|
||||
|
||||
export interface RecordUsageResult {
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records token usage for collected LLM calls and spends tokens against balance.
|
||||
* This handles both sequential execution (tool calls) and parallel execution (multiple agents).
|
||||
*/
|
||||
export async function recordCollectedUsage(
|
||||
deps: RecordUsageDeps,
|
||||
params: RecordUsageParams,
|
||||
): Promise<RecordUsageResult | undefined> {
|
||||
const {
|
||||
user,
|
||||
model,
|
||||
balance,
|
||||
transactions,
|
||||
conversationId,
|
||||
collectedUsage,
|
||||
endpointTokenConfig,
|
||||
context = 'message',
|
||||
} = params;
|
||||
|
||||
const { spendTokens, spendStructuredTokens } = deps;
|
||||
|
||||
if (!collectedUsage || !collectedUsage.length) {
|
||||
return;
|
||||
}
|
||||
|
||||
const firstUsage = collectedUsage[0];
|
||||
const input_tokens =
|
||||
(firstUsage?.input_tokens || 0) +
|
||||
(Number(firstUsage?.input_token_details?.cache_creation) ||
|
||||
Number(firstUsage?.cache_creation_input_tokens) ||
|
||||
0) +
|
||||
(Number(firstUsage?.input_token_details?.cache_read) ||
|
||||
Number(firstUsage?.cache_read_input_tokens) ||
|
||||
0);
|
||||
|
||||
let total_output_tokens = 0;
|
||||
|
||||
for (const usage of collectedUsage) {
|
||||
if (!usage) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const cache_creation =
|
||||
Number(usage.input_token_details?.cache_creation) ||
|
||||
Number(usage.cache_creation_input_tokens) ||
|
||||
0;
|
||||
const cache_read =
|
||||
Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0;
|
||||
|
||||
total_output_tokens += Number(usage.output_tokens) || 0;
|
||||
|
||||
const txMetadata: TxMetadata = {
|
||||
context,
|
||||
balance,
|
||||
transactions,
|
||||
conversationId,
|
||||
user,
|
||||
endpointTokenConfig,
|
||||
model: usage.model ?? model,
|
||||
};
|
||||
|
||||
if (cache_creation > 0 || cache_read > 0) {
|
||||
spendStructuredTokens(txMetadata, {
|
||||
promptTokens: {
|
||||
input: usage.input_tokens,
|
||||
write: cache_creation,
|
||||
read: cache_read,
|
||||
},
|
||||
completionTokens: usage.output_tokens,
|
||||
}).catch((err) => {
|
||||
logger.error('[packages/api #recordCollectedUsage] Error spending structured tokens', err);
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
spendTokens(txMetadata, {
|
||||
promptTokens: usage.input_tokens,
|
||||
completionTokens: usage.output_tokens,
|
||||
}).catch((err) => {
|
||||
logger.error('[packages/api #recordCollectedUsage] Error spending tokens', err);
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
input_tokens,
|
||||
output_tokens: total_output_tokens,
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue