mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-14 06:28:52 +01:00
🪙 refactor: Collected Usage & Anthropic Prompt Caching (#11319)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
* 🔧 refactor: Improve token calculation in AgentClient.recordCollectedUsage - Updated the token calculation logic to sum output tokens directly from all entries, addressing issues with negative values in parallel execution scenarios. - Added comments for clarity on the usage of input tokens and output tokens. - Introduced a new test file for comprehensive testing of the recordCollectedUsage function, covering various execution scenarios including sequential and parallel processing, cache token handling, and model fallback logic. * 🔧 refactor: Anthropic `promptCache` handling in LLM configuration * 🔧 test: Add comprehensive test for cache token handling in recordCollectedUsage - Introduced a new test case to validate the handling of cache tokens across multiple tool calls in the recordCollectedUsage function. - Ensured correct calculations for input and output tokens, including scenarios with cache creation and reading. - Verified the expected interactions with token spending methods to enhance the robustness of the token management logic.
This commit is contained in:
parent
1329e16d3a
commit
2a50c372ef
8 changed files with 828 additions and 40 deletions
|
|
@ -46,7 +46,7 @@
|
|||
"@googleapis/youtube": "^20.0.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.66",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
|
|||
|
|
@ -784,6 +784,7 @@ class AgentClient extends BaseClient {
|
|||
if (!collectedUsage || !collectedUsage.length) {
|
||||
return;
|
||||
}
|
||||
// Use first entry's input_tokens as the base input (represents initial user message context)
|
||||
// Support both OpenAI format (input_token_details) and Anthropic format (cache_*_input_tokens)
|
||||
const firstUsage = collectedUsage[0];
|
||||
const input_tokens =
|
||||
|
|
@ -795,10 +796,11 @@ class AgentClient extends BaseClient {
|
|||
Number(firstUsage?.cache_read_input_tokens) ||
|
||||
0);
|
||||
|
||||
let output_tokens = 0;
|
||||
let previousTokens = input_tokens; // Start with original input
|
||||
for (let i = 0; i < collectedUsage.length; i++) {
|
||||
const usage = collectedUsage[i];
|
||||
// Sum output_tokens directly from all entries - works for both sequential and parallel execution
|
||||
// This avoids the incremental calculation that produced negative values for parallel agents
|
||||
let total_output_tokens = 0;
|
||||
|
||||
for (const usage of collectedUsage) {
|
||||
if (!usage) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -811,6 +813,9 @@ class AgentClient extends BaseClient {
|
|||
const cache_read =
|
||||
Number(usage.input_token_details?.cache_read) || Number(usage.cache_read_input_tokens) || 0;
|
||||
|
||||
// Accumulate output tokens for the usage summary
|
||||
total_output_tokens += Number(usage.output_tokens) || 0;
|
||||
|
||||
const txMetadata = {
|
||||
context,
|
||||
balance,
|
||||
|
|
@ -821,18 +826,6 @@ class AgentClient extends BaseClient {
|
|||
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
|
||||
};
|
||||
|
||||
if (i > 0) {
|
||||
// Count new tokens generated (input_tokens minus previous accumulated tokens)
|
||||
output_tokens +=
|
||||
(Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens;
|
||||
}
|
||||
|
||||
// Add this message's output tokens
|
||||
output_tokens += Number(usage.output_tokens) || 0;
|
||||
|
||||
// Update previousTokens to include this message's output
|
||||
previousTokens += Number(usage.output_tokens) || 0;
|
||||
|
||||
if (cache_creation > 0 || cache_read > 0) {
|
||||
spendStructuredTokens(txMetadata, {
|
||||
promptTokens: {
|
||||
|
|
@ -862,7 +855,7 @@ class AgentClient extends BaseClient {
|
|||
|
||||
this.usage = {
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
output_tokens: total_output_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
712
api/server/controllers/agents/recordCollectedUsage.spec.js
Normal file
712
api/server/controllers/agents/recordCollectedUsage.spec.js
Normal file
|
|
@ -0,0 +1,712 @@
|
|||
/**
|
||||
* Tests for AgentClient.recordCollectedUsage
|
||||
*
|
||||
* This is a critical function that handles token spending for agent LLM calls.
|
||||
* It must correctly handle:
|
||||
* - Sequential execution (single agent with tool calls)
|
||||
* - Parallel execution (multiple agents with independent inputs)
|
||||
* - Cache token handling (OpenAI and Anthropic formats)
|
||||
*/
|
||||
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
|
||||
// Mock dependencies before requiring the module
|
||||
const mockSpendTokens = jest.fn().mockResolvedValue();
|
||||
const mockSpendStructuredTokens = jest.fn().mockResolvedValue();
|
||||
|
||||
jest.mock('~/models/spendTokens', () => ({
|
||||
spendTokens: (...args) => mockSpendTokens(...args),
|
||||
spendStructuredTokens: (...args) => mockSpendStructuredTokens(...args),
|
||||
}));
|
||||
|
||||
jest.mock('~/config', () => ({
|
||||
logger: {
|
||||
debug: jest.fn(),
|
||||
error: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
info: jest.fn(),
|
||||
},
|
||||
getMCPManager: jest.fn(() => ({
|
||||
formatInstructionsForContext: jest.fn(),
|
||||
})),
|
||||
}));
|
||||
|
||||
jest.mock('@librechat/agents', () => ({
|
||||
...jest.requireActual('@librechat/agents'),
|
||||
createMetadataAggregator: () => ({
|
||||
handleLLMEnd: jest.fn(),
|
||||
collected: [],
|
||||
}),
|
||||
}));
|
||||
|
||||
const AgentClient = require('./client');
|
||||
|
||||
describe('AgentClient - recordCollectedUsage', () => {
|
||||
let client;
|
||||
let mockAgent;
|
||||
let mockOptions;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
|
||||
mockAgent = {
|
||||
id: 'agent-123',
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
provider: EModelEndpoint.openAI,
|
||||
model_parameters: {
|
||||
model: 'gpt-4',
|
||||
},
|
||||
};
|
||||
|
||||
mockOptions = {
|
||||
req: {
|
||||
user: { id: 'user-123' },
|
||||
body: { model: 'gpt-4', endpoint: EModelEndpoint.openAI },
|
||||
},
|
||||
res: {},
|
||||
agent: mockAgent,
|
||||
endpointTokenConfig: {},
|
||||
};
|
||||
|
||||
client = new AgentClient(mockOptions);
|
||||
client.conversationId = 'convo-123';
|
||||
client.user = 'user-123';
|
||||
});
|
||||
|
||||
describe('basic functionality', () => {
|
||||
it('should return early if collectedUsage is empty', async () => {
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage: [],
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).not.toHaveBeenCalled();
|
||||
expect(mockSpendStructuredTokens).not.toHaveBeenCalled();
|
||||
expect(client.usage).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should return early if collectedUsage is null', async () => {
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage: null,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).not.toHaveBeenCalled();
|
||||
expect(client.usage).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle single usage entry correctly', async () => {
|
||||
const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(1);
|
||||
expect(mockSpendTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
conversationId: 'convo-123',
|
||||
user: 'user-123',
|
||||
model: 'gpt-4',
|
||||
}),
|
||||
{ promptTokens: 100, completionTokens: 50 },
|
||||
);
|
||||
expect(client.usage.input_tokens).toBe(100);
|
||||
expect(client.usage.output_tokens).toBe(50);
|
||||
});
|
||||
|
||||
it('should skip null entries in collectedUsage', async () => {
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
|
||||
null,
|
||||
{ input_tokens: 200, output_tokens: 60, model: 'gpt-4' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sequential execution (single agent with tool calls)', () => {
|
||||
it('should calculate tokens correctly for sequential tool calls', async () => {
|
||||
// Sequential flow: output of call N becomes part of input for call N+1
|
||||
// Call 1: input=100, output=50
|
||||
// Call 2: input=150 (100+50), output=30
|
||||
// Call 3: input=180 (150+30), output=20
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
|
||||
{ input_tokens: 150, output_tokens: 30, model: 'gpt-4' },
|
||||
{ input_tokens: 180, output_tokens: 20, model: 'gpt-4' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(3);
|
||||
// Total output should be sum of all output_tokens: 50 + 30 + 20 = 100
|
||||
expect(client.usage.output_tokens).toBe(100);
|
||||
expect(client.usage.input_tokens).toBe(100); // First entry's input
|
||||
});
|
||||
});
|
||||
|
||||
describe('parallel execution (multiple agents)', () => {
|
||||
it('should handle parallel agents with independent input tokens', async () => {
|
||||
// Parallel agents have INDEPENDENT input tokens (not cumulative)
|
||||
// Agent A: input=100, output=50
|
||||
// Agent B: input=80, output=40 (different context, not 100+50)
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
|
||||
{ input_tokens: 80, output_tokens: 40, model: 'gpt-4' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(2);
|
||||
// Expected total output: 50 + 40 = 90
|
||||
// output_tokens must be positive and should reflect total output
|
||||
expect(client.usage.output_tokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should NOT produce negative output_tokens for parallel execution', async () => {
|
||||
// Critical bug scenario: parallel agents where second agent has LOWER input tokens
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 200, output_tokens: 100, model: 'gpt-4' },
|
||||
{ input_tokens: 50, output_tokens: 30, model: 'gpt-4' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
// output_tokens MUST be positive for proper token tracking
|
||||
expect(client.usage.output_tokens).toBeGreaterThan(0);
|
||||
// Correct value should be 100 + 30 = 130
|
||||
});
|
||||
|
||||
it('should calculate correct total output for parallel agents', async () => {
|
||||
// Three parallel agents with independent contexts
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
|
||||
{ input_tokens: 120, output_tokens: 60, model: 'gpt-4-turbo' },
|
||||
{ input_tokens: 80, output_tokens: 40, model: 'claude-3' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(3);
|
||||
// Total output should be 50 + 60 + 40 = 150
|
||||
expect(client.usage.output_tokens).toBe(150);
|
||||
});
|
||||
|
||||
it('should handle worst-case parallel scenario without negative tokens', async () => {
|
||||
// Extreme case: first agent has very high input, subsequent have low
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 1000, output_tokens: 500, model: 'gpt-4' },
|
||||
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
|
||||
{ input_tokens: 50, output_tokens: 25, model: 'gpt-4' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
// Must be positive, should be 500 + 50 + 25 = 575
|
||||
expect(client.usage.output_tokens).toBeGreaterThan(0);
|
||||
expect(client.usage.output_tokens).toBe(575);
|
||||
});
|
||||
});
|
||||
|
||||
describe('real-world scenarios', () => {
|
||||
it('should correctly sum output tokens for sequential tool calls with growing context', async () => {
|
||||
// Real production data: Claude Opus with multiple tool calls
|
||||
// Context grows as tool results are added, but output_tokens should only count model generations
|
||||
const collectedUsage = [
|
||||
{
|
||||
input_tokens: 31596,
|
||||
output_tokens: 151,
|
||||
total_tokens: 31747,
|
||||
input_token_details: { cache_read: 0, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 35368,
|
||||
output_tokens: 150,
|
||||
total_tokens: 35518,
|
||||
input_token_details: { cache_read: 0, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 58362,
|
||||
output_tokens: 295,
|
||||
total_tokens: 58657,
|
||||
input_token_details: { cache_read: 0, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 112604,
|
||||
output_tokens: 193,
|
||||
total_tokens: 112797,
|
||||
input_token_details: { cache_read: 0, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 257440,
|
||||
output_tokens: 2217,
|
||||
total_tokens: 259657,
|
||||
input_token_details: { cache_read: 0, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
// input_tokens should be first entry's input (initial context)
|
||||
expect(client.usage.input_tokens).toBe(31596);
|
||||
|
||||
// output_tokens should be sum of all model outputs: 151 + 150 + 295 + 193 + 2217 = 3006
|
||||
// NOT the inflated value from incremental calculation (338,559)
|
||||
expect(client.usage.output_tokens).toBe(3006);
|
||||
|
||||
// Verify spendTokens was called for each entry with correct values
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(5);
|
||||
expect(mockSpendTokens).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({ model: 'claude-opus-4-5-20251101' }),
|
||||
{ promptTokens: 31596, completionTokens: 151 },
|
||||
);
|
||||
expect(mockSpendTokens).toHaveBeenNthCalledWith(
|
||||
5,
|
||||
expect.objectContaining({ model: 'claude-opus-4-5-20251101' }),
|
||||
{ promptTokens: 257440, completionTokens: 2217 },
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle single followup message correctly', async () => {
|
||||
// Real production data: followup to the above conversation
|
||||
const collectedUsage = [
|
||||
{
|
||||
input_tokens: 263406,
|
||||
output_tokens: 257,
|
||||
total_tokens: 263663,
|
||||
input_token_details: { cache_read: 0, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(client.usage.input_tokens).toBe(263406);
|
||||
expect(client.usage.output_tokens).toBe(257);
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(1);
|
||||
expect(mockSpendTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'claude-opus-4-5-20251101' }),
|
||||
{ promptTokens: 263406, completionTokens: 257 },
|
||||
);
|
||||
});
|
||||
|
||||
it('should ensure output_tokens > 0 check passes for BaseClient.sendMessage', async () => {
|
||||
// This verifies the fix for the duplicate token spending bug
|
||||
// BaseClient.sendMessage checks: if (usage != null && Number(usage[this.outputTokensKey]) > 0)
|
||||
const collectedUsage = [
|
||||
{
|
||||
input_tokens: 31596,
|
||||
output_tokens: 151,
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 35368,
|
||||
output_tokens: 150,
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
const usage = client.getStreamUsage();
|
||||
|
||||
// The check that was failing before the fix
|
||||
expect(usage).not.toBeNull();
|
||||
expect(Number(usage.output_tokens)).toBeGreaterThan(0);
|
||||
|
||||
// Verify correct value
|
||||
expect(usage.output_tokens).toBe(301); // 151 + 150
|
||||
});
|
||||
|
||||
it('should correctly handle cache tokens with multiple tool calls', async () => {
|
||||
// Real production data: Claude Opus with cache tokens (prompt caching)
|
||||
// First entry has cache_creation, subsequent entries have cache_read
|
||||
const collectedUsage = [
|
||||
{
|
||||
input_tokens: 788,
|
||||
output_tokens: 163,
|
||||
total_tokens: 951,
|
||||
input_token_details: { cache_read: 0, cache_creation: 30808 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 3802,
|
||||
output_tokens: 149,
|
||||
total_tokens: 3951,
|
||||
input_token_details: { cache_read: 30808, cache_creation: 768 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 26808,
|
||||
output_tokens: 225,
|
||||
total_tokens: 27033,
|
||||
input_token_details: { cache_read: 31576, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 80912,
|
||||
output_tokens: 204,
|
||||
total_tokens: 81116,
|
||||
input_token_details: { cache_read: 31576, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 136454,
|
||||
output_tokens: 206,
|
||||
total_tokens: 136660,
|
||||
input_token_details: { cache_read: 31576, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 146316,
|
||||
output_tokens: 224,
|
||||
total_tokens: 146540,
|
||||
input_token_details: { cache_read: 31576, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 150402,
|
||||
output_tokens: 1248,
|
||||
total_tokens: 151650,
|
||||
input_token_details: { cache_read: 31576, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 156268,
|
||||
output_tokens: 139,
|
||||
total_tokens: 156407,
|
||||
input_token_details: { cache_read: 31576, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
{
|
||||
input_tokens: 167126,
|
||||
output_tokens: 2961,
|
||||
total_tokens: 170087,
|
||||
input_token_details: { cache_read: 31576, cache_creation: 0 },
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
},
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
// input_tokens = first entry's input + cache_creation + cache_read
|
||||
// = 788 + 30808 + 0 = 31596
|
||||
expect(client.usage.input_tokens).toBe(31596);
|
||||
|
||||
// output_tokens = sum of all output_tokens
|
||||
// = 163 + 149 + 225 + 204 + 206 + 224 + 1248 + 139 + 2961 = 5519
|
||||
expect(client.usage.output_tokens).toBe(5519);
|
||||
|
||||
// First 2 entries have cache tokens, should use spendStructuredTokens
|
||||
// Remaining 7 entries have cache_read but no cache_creation, still structured
|
||||
expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(9);
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(0);
|
||||
|
||||
// Verify first entry uses structured tokens with cache_creation
|
||||
expect(mockSpendStructuredTokens).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({ model: 'claude-opus-4-5-20251101' }),
|
||||
{
|
||||
promptTokens: { input: 788, write: 30808, read: 0 },
|
||||
completionTokens: 163,
|
||||
},
|
||||
);
|
||||
|
||||
// Verify second entry uses structured tokens with both cache_creation and cache_read
|
||||
expect(mockSpendStructuredTokens).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({ model: 'claude-opus-4-5-20251101' }),
|
||||
{
|
||||
promptTokens: { input: 3802, write: 768, read: 30808 },
|
||||
completionTokens: 149,
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cache token handling', () => {
|
||||
it('should handle OpenAI format cache tokens (input_token_details)', async () => {
|
||||
const collectedUsage = [
|
||||
{
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
model: 'gpt-4',
|
||||
input_token_details: {
|
||||
cache_creation: 20,
|
||||
cache_read: 10,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(1);
|
||||
expect(mockSpendStructuredTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'gpt-4' }),
|
||||
{
|
||||
promptTokens: {
|
||||
input: 100,
|
||||
write: 20,
|
||||
read: 10,
|
||||
},
|
||||
completionTokens: 50,
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle Anthropic format cache tokens (cache_*_input_tokens)', async () => {
|
||||
const collectedUsage = [
|
||||
{
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
model: 'claude-3',
|
||||
cache_creation_input_tokens: 25,
|
||||
cache_read_input_tokens: 15,
|
||||
},
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(1);
|
||||
expect(mockSpendStructuredTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'claude-3' }),
|
||||
{
|
||||
promptTokens: {
|
||||
input: 100,
|
||||
write: 25,
|
||||
read: 15,
|
||||
},
|
||||
completionTokens: 50,
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it('should use spendTokens for entries without cache tokens', async () => {
|
||||
const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(1);
|
||||
expect(mockSpendStructuredTokens).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle mixed cache and non-cache entries', async () => {
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' },
|
||||
{
|
||||
input_tokens: 150,
|
||||
output_tokens: 30,
|
||||
model: 'gpt-4',
|
||||
input_token_details: { cache_creation: 10, cache_read: 5 },
|
||||
},
|
||||
{ input_tokens: 200, output_tokens: 20, model: 'gpt-4' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledTimes(2);
|
||||
expect(mockSpendStructuredTokens).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should include cache tokens in total input calculation', async () => {
|
||||
const collectedUsage = [
|
||||
{
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
model: 'gpt-4',
|
||||
input_token_details: {
|
||||
cache_creation: 20,
|
||||
cache_read: 10,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
// Total input should include cache tokens: 100 + 20 + 10 = 130
|
||||
expect(client.usage.input_tokens).toBe(130);
|
||||
});
|
||||
});
|
||||
|
||||
describe('model fallback', () => {
|
||||
it('should use usage.model when available', async () => {
|
||||
const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4-turbo' }];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
model: 'fallback-model',
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'gpt-4-turbo' }),
|
||||
expect.any(Object),
|
||||
);
|
||||
});
|
||||
|
||||
it('should fallback to param model when usage.model is missing', async () => {
|
||||
const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
model: 'param-model',
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'param-model' }),
|
||||
expect.any(Object),
|
||||
);
|
||||
});
|
||||
|
||||
it('should fallback to client.model when param model is missing', async () => {
|
||||
client.model = 'client-model';
|
||||
const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'client-model' }),
|
||||
expect.any(Object),
|
||||
);
|
||||
});
|
||||
|
||||
it('should fallback to agent model_parameters.model as last resort', async () => {
|
||||
const collectedUsage = [{ input_tokens: 100, output_tokens: 50 }];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
expect(mockSpendTokens).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'gpt-4' }),
|
||||
expect.any(Object),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getStreamUsage integration', () => {
|
||||
it('should return the usage object set by recordCollectedUsage', async () => {
|
||||
const collectedUsage = [{ input_tokens: 100, output_tokens: 50, model: 'gpt-4' }];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
const usage = client.getStreamUsage();
|
||||
expect(usage).toEqual({
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
});
|
||||
});
|
||||
|
||||
it('should return undefined before recordCollectedUsage is called', () => {
|
||||
const usage = client.getStreamUsage();
|
||||
expect(usage).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should have output_tokens > 0 for BaseClient.sendMessage check', async () => {
|
||||
// This test verifies the usage will pass the check in BaseClient.sendMessage:
|
||||
// if (usage != null && Number(usage[this.outputTokensKey]) > 0)
|
||||
const collectedUsage = [
|
||||
{ input_tokens: 200, output_tokens: 100, model: 'gpt-4' },
|
||||
{ input_tokens: 50, output_tokens: 30, model: 'gpt-4' },
|
||||
];
|
||||
|
||||
await client.recordCollectedUsage({
|
||||
collectedUsage,
|
||||
balance: { enabled: true },
|
||||
transactions: { enabled: true },
|
||||
});
|
||||
|
||||
const usage = client.getStreamUsage();
|
||||
expect(usage).not.toBeNull();
|
||||
expect(Number(usage.output_tokens)).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
17
package-lock.json
generated
17
package-lock.json
generated
|
|
@ -60,7 +60,7 @@
|
|||
"@googleapis/youtube": "^20.0.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.66",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
@ -12660,9 +12660,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@librechat/agents": {
|
||||
"version": "3.0.66",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.66.tgz",
|
||||
"integrity": "sha512-JpQo7w+/yLM3dJ46lyGrm4gPTjiHERwcpojw7drvpYWqOU4e2jmjK0JbNxQ0jP00q+nDhPG+mqJ2qQU7TVraOQ==",
|
||||
"version": "3.0.77",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.77.tgz",
|
||||
"integrity": "sha512-Wr9d8bjJAQSl03nEgnAPG6jBQT1fL3sNV3TFDN1FvFQt6WGfdok838Cbcn+/tSGXSPJcICTxNkMT7VN8P6bCPw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@langchain/anthropic": "^0.3.26",
|
||||
|
|
@ -12686,6 +12686,7 @@
|
|||
"https-proxy-agent": "^7.0.6",
|
||||
"mathjs": "^15.1.0",
|
||||
"nanoid": "^3.3.7",
|
||||
"okapibm25": "^1.4.1",
|
||||
"openai": "5.8.2"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -34310,6 +34311,12 @@
|
|||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/okapibm25": {
|
||||
"version": "1.4.1",
|
||||
"resolved": "https://registry.npmjs.org/okapibm25/-/okapibm25-1.4.1.tgz",
|
||||
"integrity": "sha512-UHmeH4MAtZXGFVncwbY7pfFvDVNxpsyM3W66aGPU0SHj1+ld59ty+9lJ0ifcrcnPUl1XdYoDgb06ObyCnpTs3g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ollama": {
|
||||
"version": "0.5.18",
|
||||
"resolved": "https://registry.npmjs.org/ollama/-/ollama-0.5.18.tgz",
|
||||
|
|
@ -43169,7 +43176,7 @@
|
|||
"@google/genai": "^1.19.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.66",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.25.2",
|
||||
"@smithy/node-http-handler": "^4.4.5",
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@
|
|||
"@google/genai": "^1.19.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.80",
|
||||
"@librechat/agents": "^3.0.66",
|
||||
"@librechat/agents": "^3.0.77",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.25.2",
|
||||
"@smithy/node-http-handler": "^4.4.5",
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ describe('getLLMConfig', () => {
|
|||
expect(result.llmConfig.thinking).toHaveProperty('budget_tokens', 2000);
|
||||
});
|
||||
|
||||
it('should add "context-1m" beta header for claude-sonnet-4 model', () => {
|
||||
it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model', () => {
|
||||
const modelOptions = {
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
promptCache: true,
|
||||
|
|
@ -98,9 +98,10 @@ describe('getLLMConfig', () => {
|
|||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
|
||||
it('should add "context-1m" beta header for claude-sonnet-4 model formats', () => {
|
||||
it('should add "context-1m" beta header and promptCache boolean for claude-sonnet-4 model formats', () => {
|
||||
const modelVariations = [
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4-latest',
|
||||
|
|
@ -115,10 +116,11 @@ describe('getLLMConfig', () => {
|
|||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe('context-1m-2025-08-07');
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('should not add beta headers for claude-opus-4-5 model (prompt caching no longer needs header)', () => {
|
||||
it('should pass promptCache boolean for claude-opus-4-5 model (no beta header needed)', () => {
|
||||
const modelOptions = {
|
||||
model: 'claude-opus-4-5',
|
||||
promptCache: true,
|
||||
|
|
@ -126,9 +128,10 @@ describe('getLLMConfig', () => {
|
|||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions?.defaultHeaders).toBeUndefined();
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
|
||||
it('should not add beta headers for claude-opus-4-5 model formats (prompt caching no longer needs header)', () => {
|
||||
it('should pass promptCache boolean for claude-opus-4-5 model formats (no beta header needed)', () => {
|
||||
const modelVariations = [
|
||||
'claude-opus-4-5',
|
||||
'claude-opus-4-5-20250420',
|
||||
|
|
@ -141,6 +144,7 @@ describe('getLLMConfig', () => {
|
|||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions?.defaultHeaders).toBeUndefined();
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -299,10 +303,11 @@ describe('getLLMConfig', () => {
|
|||
},
|
||||
});
|
||||
|
||||
// claude-3-5-sonnet supports prompt caching and should get the max-tokens header
|
||||
// claude-3-5-sonnet supports prompt caching and should get the max-tokens header and promptCache boolean
|
||||
expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
|
||||
});
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle thinking and thinkingBudget options', () => {
|
||||
|
|
@ -512,6 +517,8 @@ describe('getLLMConfig', () => {
|
|||
expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({
|
||||
'anthropic-beta': 'token-efficient-tools-2025-02-19,output-128k-2025-02-19',
|
||||
});
|
||||
// Should pass promptCache boolean
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle web search functionality like production', () => {
|
||||
|
|
@ -1160,21 +1167,66 @@ describe('getLLMConfig', () => {
|
|||
it('should handle prompt cache support logic for different models', () => {
|
||||
const testCases = [
|
||||
// Models that support prompt cache (and have other beta headers)
|
||||
{ model: 'claude-3-5-sonnet', promptCache: true, shouldHaveHeaders: true },
|
||||
{ model: 'claude-3.5-sonnet-20241022', promptCache: true, shouldHaveHeaders: true },
|
||||
{ model: 'claude-3-7-sonnet', promptCache: true, shouldHaveHeaders: true },
|
||||
{ model: 'claude-3.7-sonnet-20250109', promptCache: true, shouldHaveHeaders: true },
|
||||
{ model: 'claude-sonnet-4-20250514', promptCache: true, shouldHaveHeaders: true },
|
||||
{
|
||||
model: 'claude-3-5-sonnet',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: true,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
{
|
||||
model: 'claude-3.5-sonnet-20241022',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: true,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
{
|
||||
model: 'claude-3-7-sonnet',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: true,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
{
|
||||
model: 'claude-3.7-sonnet-20250109',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: true,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
{
|
||||
model: 'claude-sonnet-4-20250514',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: true,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
// Models that support prompt cache but have no additional beta headers needed
|
||||
{ model: 'claude-3-opus', promptCache: true, shouldHaveHeaders: false },
|
||||
{
|
||||
model: 'claude-3-opus',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: false,
|
||||
shouldHavePromptCache: true,
|
||||
},
|
||||
// Models that don't support prompt cache
|
||||
{ model: 'claude-3-5-sonnet-latest', promptCache: true, shouldHaveHeaders: false },
|
||||
{ model: 'claude-3.5-sonnet-latest', promptCache: true, shouldHaveHeaders: false },
|
||||
{
|
||||
model: 'claude-3-5-sonnet-latest',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: false,
|
||||
shouldHavePromptCache: false,
|
||||
},
|
||||
{
|
||||
model: 'claude-3.5-sonnet-latest',
|
||||
promptCache: true,
|
||||
shouldHaveHeaders: false,
|
||||
shouldHavePromptCache: false,
|
||||
},
|
||||
// Prompt cache disabled
|
||||
{ model: 'claude-3-5-sonnet', promptCache: false, shouldHaveHeaders: false },
|
||||
{
|
||||
model: 'claude-3-5-sonnet',
|
||||
promptCache: false,
|
||||
shouldHaveHeaders: false,
|
||||
shouldHavePromptCache: false,
|
||||
},
|
||||
];
|
||||
|
||||
testCases.forEach(({ model, promptCache, shouldHaveHeaders }) => {
|
||||
testCases.forEach(({ model, promptCache, shouldHaveHeaders, shouldHavePromptCache }) => {
|
||||
const result = getLLMConfig('test-key', {
|
||||
modelOptions: { model, promptCache },
|
||||
});
|
||||
|
|
@ -1187,6 +1239,12 @@ describe('getLLMConfig', () => {
|
|||
} else {
|
||||
expect(headers).toBeUndefined();
|
||||
}
|
||||
|
||||
if (shouldHavePromptCache) {
|
||||
expect(result.llmConfig.promptCache).toBe(true);
|
||||
} else {
|
||||
expect(result.llmConfig.promptCache).toBeUndefined();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -155,6 +155,12 @@ function getLLMConfig(
|
|||
|
||||
const supportsCacheControl =
|
||||
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model ?? '');
|
||||
|
||||
/** Pass promptCache boolean for downstream cache_control application */
|
||||
if (supportsCacheControl) {
|
||||
(requestOptions as Record<string, unknown>).promptCache = true;
|
||||
}
|
||||
|
||||
const headers = getClaudeHeaders(requestOptions.model ?? '', supportsCacheControl);
|
||||
if (headers && requestOptions.clientOptions) {
|
||||
requestOptions.clientOptions.defaultHeaders = headers;
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
type: 'enabled',
|
||||
budget_tokens: 2000,
|
||||
},
|
||||
promptCache: true,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
|
|
@ -87,6 +88,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
type: 'enabled',
|
||||
budget_tokens: 3000,
|
||||
},
|
||||
promptCache: true,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
|
|
@ -134,6 +136,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
user_id: 'user123',
|
||||
},
|
||||
topK: 50,
|
||||
promptCache: true,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
|
|
@ -175,6 +178,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
metadata: {
|
||||
user_id: 'user456',
|
||||
},
|
||||
promptCache: true,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
|
|
@ -187,7 +191,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should apply custom headers without anthropic-beta for models that dont need it', () => {
|
||||
it('should apply custom headers and promptCache for models that support caching', () => {
|
||||
const apiKey = 'sk-custom';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
|
|
@ -218,6 +222,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
metadata: {
|
||||
user_id: undefined,
|
||||
},
|
||||
promptCache: true,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
|
|
@ -300,6 +305,9 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
stream: true,
|
||||
topP: 0.9,
|
||||
maxTokens: 2048,
|
||||
modelKwargs: {
|
||||
promptCache: true,
|
||||
},
|
||||
// temperature is dropped
|
||||
// modelKwargs.topK is dropped
|
||||
// modelKwargs.metadata is dropped completely
|
||||
|
|
@ -379,6 +387,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
metadata: {
|
||||
user_id: 'searchUser',
|
||||
},
|
||||
promptCache: true,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
|
|
@ -425,6 +434,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
user_id: 'testUser',
|
||||
},
|
||||
topK: 40,
|
||||
promptCache: true,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
|
|
@ -470,6 +480,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
metadata: {
|
||||
user_id: 'addUser',
|
||||
},
|
||||
promptCache: true,
|
||||
customParam1: 'value1', // Unknown params added to modelKwargs
|
||||
customParam2: 42,
|
||||
},
|
||||
|
|
@ -519,6 +530,7 @@ describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
|||
metadata: {
|
||||
user_id: 'bothUser',
|
||||
},
|
||||
promptCache: true,
|
||||
customParam: 'customValue',
|
||||
// topK is dropped
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue