📉 feat: Add Token Usage Tracking for Agents API Routes (#11600)

* feat: Implement token usage tracking for OpenAI and Responses controllers - Added functionality to record token usage against user balances in OpenAIChatCompletionController and createResponse functions. - Introduced new utility functions for managing token spending and structured token usage. - Enhanced error handling for token recording to improve logging and debugging capabilities. - Updated imports to include new usage tracking methods and configurations. * test: Add unit tests for recordCollectedUsage function in usage.spec.ts - Introduced comprehensive tests for the recordCollectedUsage function, covering various scenarios including handling empty and null collectedUsage, single and multiple usage entries, and sequential and parallel execution cases. - Enhanced token handling tests to ensure correct calculations for both OpenAI and Anthropic formats, including cache token management. - Improved overall test coverage for usage tracking functionality, ensuring robust validation of expected behaviors and outcomes. * test: Add unit tests for OpenAI and Responses API controllers - Introduced comprehensive unit tests for the OpenAIChatCompletionController and createResponse functions, focusing on the correct invocation of recordCollectedUsage for token spending. - Enhanced tests to validate the passing of balance and transactions configuration to the recordCollectedUsage function. - Ensured proper dependency injection of spendTokens and spendStructuredTokens in the usage recording process. - Improved overall test coverage for token usage tracking, ensuring robust validation of expected behaviors and outcomes.
2026-03-02 22:30:18 +01:00 · 2026-02-01 21:36:51 -05:00 · 2026-02-01 21:36:51 -05:00 · 9a38af5875
commit 9a38af5875
parent d13037881a
7 changed files with 1190 additions and 3 deletions
--- a/packages/api/src/agents/usage.ts
+++ b/packages/api/src/agents/usage.ts
@ -0,0 +1,146 @@
+import { logger } from '@librechat/data-schemas';
+import type { TCustomConfig, TTransactionsConfig } from 'librechat-data-provider';
+import type { UsageMetadata } from '../stream/interfaces/IJobStore';
+import type { EndpointTokenConfig } from '../types/tokens';
+
+interface TokenUsage {
+  promptTokens?: number;
+  completionTokens?: number;
+}
+
+interface StructuredPromptTokens {
+  input?: number;
+  write?: number;
+  read?: number;
+}
+
+interface StructuredTokenUsage {
+  promptTokens?: StructuredPromptTokens;
+  completionTokens?: number;
+}
+
+interface TxMetadata {
+  user: string;
+  model?: string;
+  context: string;
+  conversationId: string;
+  balance?: Partial<TCustomConfig['balance']> | null;
+  transactions?: Partial<TTransactionsConfig>;
+  endpointTokenConfig?: EndpointTokenConfig;
+}
+
+type SpendTokensFn = (txData: TxMetadata, tokenUsage: TokenUsage) => Promise<unknown>;
+type SpendStructuredTokensFn = (
+  txData: TxMetadata,
+  tokenUsage: StructuredTokenUsage,
+) => Promise<unknown>;
+
+export interface RecordUsageDeps {
+  spendTokens: SpendTokensFn;
+  spendStructuredTokens: SpendStructuredTokensFn;
+}
+
+export interface RecordUsageParams {
+  user: string;
+  conversationId: string;
+  collectedUsage: UsageMetadata[];
+  model?: string;
+  context?: string;
+  balance?: Partial<TCustomConfig['balance']> | null;
+  transactions?: Partial<TTransactionsConfig>;
+  endpointTokenConfig?: EndpointTokenConfig;
+}
+
+export interface RecordUsageResult {
+  input_tokens: number;
+  output_tokens: number;
+}
+
+/**
+ * Records token usage for collected LLM calls and spends tokens against balance.
+ * This handles both sequential execution (tool calls) and parallel execution (multiple agents).
+ */
+export async function recordCollectedUsage(
+  deps: RecordUsageDeps,
+  params: RecordUsageParams,
+): Promise<RecordUsageResult | undefined> {
+  const {
+    user,
+    model,
+    balance,
+    transactions,
+    conversationId,
+    collectedUsage,
+    endpointTokenConfig,
+    context = 'message',
+  } = params;
+
+  const { spendTokens, spendStructuredTokens } = deps;
+
+  if (!collectedUsage || !collectedUsage.length) {
+    return;
+  }
+
+  const firstUsage = collectedUsage[0];
+  const input_tokens =
+    (firstUsage?.input_tokens || 0) +
+    (Number(firstUsage?.input_token_details?.cache_creation) ||
+      Number(firstUsage?.cache_creation_input_tokens) ||
+      0) +
+    (Number(firstUsage?.input_token_details?.cache_read) ||
+      Number(firstUsage?.cache_read_input_tokens) ||
+      0);
+
+  let total_output_tokens = 0;
+
+  for (const usage of collectedUsage) {
+    if (!usage) {
+      continue;
+    }
+
+    const cache_creation =
+      Number(usage.input_token_details?.cache_creation) ||
+      Number(usage.cache_creation_input_tokens) ||
+      0;
+    const cache_read =
+      Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0;
+
+    total_output_tokens += Number(usage.output_tokens) || 0;
+
+    const txMetadata: TxMetadata = {
+      context,
+      balance,
+      transactions,
+      conversationId,
+      user,
+      endpointTokenConfig,
+      model: usage.model ?? model,
+    };
+
+    if (cache_creation > 0 || cache_read > 0) {
+      spendStructuredTokens(txMetadata, {
+        promptTokens: {
+          input: usage.input_tokens,
+          write: cache_creation,
+          read: cache_read,
+        },
+        completionTokens: usage.output_tokens,
+      }).catch((err) => {
+        logger.error('[packages/api #recordCollectedUsage] Error spending structured tokens', err);
+      });
+      continue;
+    }
+
+    spendTokens(txMetadata, {
+      promptTokens: usage.input_tokens,
+      completionTokens: usage.output_tokens,
+    }).catch((err) => {
+      logger.error('[packages/api #recordCollectedUsage] Error spending tokens', err);
+    });
+  }
+
+  return {
+    input_tokens,
+    output_tokens: total_output_tokens,
+  };
+}