From c6ecf0095b6559f7b5448e50c25e519cc36b4ffa Mon Sep 17 00:00:00 2001 From: Dustin Healy <54083382+dustinhealy@users.noreply.github.com> Date: Mon, 8 Sep 2025 11:35:29 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=9A=EF=B8=8F=20feat:=20Anthropic=20Par?= =?UTF-8?q?ameter=20Set=20Support=20via=20Custom=20Endpoints=20(#9415)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: modularize openai llm config logic into new getOpenAILLMConfig function (#9412) * ✈️ refactor: Migrate Anthropic's getLLMConfig to TypeScript (#9413) * refactor: move tokens.js over to packages/api and update imports * refactor: port tokens.js to typescript * refactor: move helpers.js over to packages/api and update imports * refactor: port helpers.js to typescript * refactor: move anthropic/llm.js over to packages/api and update imports * refactor: port anthropic/llm.js to typescript with supporting types in types/anthropic.ts and updated tests in llm.spec.js * refactor: move llm.spec.js over to packages/api and update import * refactor: port llm.spec.js over to typescript * 📝 Add Prompt Parameter Support for Anthropic Custom Endpoints (#9414) feat: add anthropic llm config support for openai-like (custom) endpoints * fix: missed compiler / type issues from addition of getAnthropicLLMConfig * refactor: update tokens.ts to export constants and functions, enhance type definitions, and adjust default values * WIP: first pass, decouple `llmConfig` from `configOptions` * chore: update import path for OpenAI configuration from 'llm' to 'config' * refactor: enhance type definitions for ThinkingConfig and update modelOptions in AnthropicConfigOptions * refactor: cleanup type, introduce openai transform from alt provider * chore: integrate removeNullishValues in Google llmConfig and update OpenAI exports * chore: bump version of @librechat/api to 1.3.5 in package.json and package-lock.json * refactor: update customParams type in OpenAIConfigOptions to use TConfig['customParams'] * refactor: enhance transformToOpenAIConfig to include fromEndpoint and improve config extraction * refactor: conform userId field for anthropic/openai, cleanup anthropic typing * ci: add backward compatibility tests for getOpenAIConfig with various endpoints and configurations * ci: replace userId with user in clientOptions for getLLMConfig * test: add Azure OpenAI endpoint tests for various configurations in getOpenAIConfig * refactor: defaultHeaders retrieval for prompt caching for anthropic-based custom endpoint (litellm) * test: add unit tests for getOpenAIConfig with various Anthropic model configurations * test: enhance Anthropic compatibility tests with addParams and dropParams handling * chore: update @librechat/agents dependency to version 2.4.78 in package.json and package-lock.json * chore: update @librechat/agents dependency to version 2.4.79 in package.json and package-lock.json --------- Co-authored-by: Danny Avila --- api/app/clients/AnthropicClient.js | 18 +- api/app/clients/GoogleClient.js | 2 +- api/app/clients/OpenAIClient.js | 4 +- api/app/clients/specs/FakeClient.js | 2 +- api/models/tx.js | 2 +- api/package.json | 2 +- api/server/controllers/agents/client.js | 9 +- api/server/controllers/assistants/chatV1.js | 3 +- api/server/controllers/assistants/chatV2.js | 3 +- api/server/services/Endpoints/agents/agent.js | 2 +- .../Endpoints/anthropic/initialize.js | 4 +- .../services/Endpoints/anthropic/llm.js | 103 ---- .../services/Endpoints/bedrock/initialize.js | 2 +- api/server/services/ModelService.js | 4 +- api/server/services/ModelService.spec.js | 8 +- api/utils/deriveBaseURL.spec.js | 4 +- api/utils/index.js | 2 - api/utils/tokens.spec.js | 14 +- package-lock.json | 12 +- packages/api/package.json | 4 +- .../api/src/endpoints/anthropic/helpers.ts | 36 +- packages/api/src/endpoints/anthropic/index.ts | 2 + .../api/src/endpoints/anthropic/llm.spec.ts | 111 ++-- packages/api/src/endpoints/anthropic/llm.ts | 105 ++++ packages/api/src/endpoints/google/llm.ts | 10 +- packages/api/src/endpoints/index.ts | 1 + .../endpoints/openai/config.anthropic.spec.ts | 551 ++++++++++++++++++ .../openai/config.backward-compat.spec.ts | 431 ++++++++++++++ .../openai/{llm.spec.ts => config.spec.ts} | 3 +- packages/api/src/endpoints/openai/config.ts | 150 +++++ packages/api/src/endpoints/openai/index.ts | 1 + .../api/src/endpoints/openai/initialize.ts | 4 +- packages/api/src/endpoints/openai/llm.ts | 224 +++---- .../api/src/endpoints/openai/transform.ts | 95 +++ packages/api/src/types/anthropic.ts | 69 +++ packages/api/src/types/index.ts | 1 + packages/api/src/types/openai.ts | 20 +- packages/api/src/utils/index.ts | 1 + .../api/src/utils/tokens.ts | 142 +++-- packages/data-provider/src/schemas.ts | 7 +- 40 files changed, 1736 insertions(+), 432 deletions(-) delete mode 100644 api/server/services/Endpoints/anthropic/llm.js rename api/server/services/Endpoints/anthropic/helpers.js => packages/api/src/endpoints/anthropic/helpers.ts (77%) create mode 100644 packages/api/src/endpoints/anthropic/index.ts rename api/server/services/Endpoints/anthropic/llm.spec.js => packages/api/src/endpoints/anthropic/llm.spec.ts (91%) create mode 100644 packages/api/src/endpoints/anthropic/llm.ts create mode 100644 packages/api/src/endpoints/openai/config.anthropic.spec.ts create mode 100644 packages/api/src/endpoints/openai/config.backward-compat.spec.ts rename packages/api/src/endpoints/openai/{llm.spec.ts => config.spec.ts} (99%) create mode 100644 packages/api/src/endpoints/openai/config.ts create mode 100644 packages/api/src/endpoints/openai/transform.ts create mode 100644 packages/api/src/types/anthropic.ts rename api/utils/tokens.js => packages/api/src/utils/tokens.ts (73%) diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js index a3fba29d5..834877bb4 100644 --- a/api/app/clients/AnthropicClient.js +++ b/api/app/clients/AnthropicClient.js @@ -10,7 +10,17 @@ const { validateVisionModel, } = require('librechat-data-provider'); const { SplitStreamHandler: _Handler } = require('@librechat/agents'); -const { Tokenizer, createFetch, createStreamEventHandlers } = require('@librechat/api'); +const { + Tokenizer, + createFetch, + matchModelName, + getClaudeHeaders, + getModelMaxTokens, + configureReasoning, + checkPromptCacheSupport, + getModelMaxOutputTokens, + createStreamEventHandlers, +} = require('@librechat/api'); const { truncateText, formatMessage, @@ -19,12 +29,6 @@ const { parseParamFromPrompt, createContextHandlers, } = require('./prompts'); -const { - getClaudeHeaders, - configureReasoning, - checkPromptCacheSupport, -} = require('~/server/services/Endpoints/anthropic/helpers'); -const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils'); const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); const { encodeAndFormat } = require('~/server/services/Files/images/encode'); const { sleep } = require('~/server/utils'); diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 2ec23a0a0..715f48ff5 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -1,4 +1,5 @@ const { google } = require('googleapis'); +const { getModelMaxTokens } = require('@librechat/api'); const { concat } = require('@langchain/core/utils/stream'); const { ChatVertexAI } = require('@langchain/google-vertexai'); const { Tokenizer, getSafetySettings } = require('@librechat/api'); @@ -21,7 +22,6 @@ const { } = require('librechat-data-provider'); const { encodeAndFormat } = require('~/server/services/Files/images'); const { spendTokens } = require('~/models/spendTokens'); -const { getModelMaxTokens } = require('~/utils'); const { sleep } = require('~/server/utils'); const { logger } = require('~/config'); const { diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index d6fd018ae..2b254036c 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -7,7 +7,9 @@ const { createFetch, resolveHeaders, constructAzureURL, + getModelMaxTokens, genAzureChatCompletion, + getModelMaxOutputTokens, createStreamEventHandlers, } = require('@librechat/api'); const { @@ -31,13 +33,13 @@ const { titleInstruction, createContextHandlers, } = require('./prompts'); -const { extractBaseURL, getModelMaxTokens, getModelMaxOutputTokens } = require('~/utils'); const { encodeAndFormat } = require('~/server/services/Files/images/encode'); const { addSpaceIfNeeded, sleep } = require('~/server/utils'); const { spendTokens } = require('~/models/spendTokens'); const { handleOpenAIErrors } = require('./tools/util'); const { summaryBuffer } = require('./memory'); const { runTitleChain } = require('./chains'); +const { extractBaseURL } = require('~/utils'); const { tokenSplit } = require('./document'); const BaseClient = require('./BaseClient'); const { createLLM } = require('./llm'); diff --git a/api/app/clients/specs/FakeClient.js b/api/app/clients/specs/FakeClient.js index a466bb97f..8c7984706 100644 --- a/api/app/clients/specs/FakeClient.js +++ b/api/app/clients/specs/FakeClient.js @@ -1,5 +1,5 @@ +const { getModelMaxTokens } = require('@librechat/api'); const BaseClient = require('../BaseClient'); -const { getModelMaxTokens } = require('../../../utils'); class FakeClient extends BaseClient { constructor(apiKey, options = {}) { diff --git a/api/models/tx.js b/api/models/tx.js index ca69660f2..66a807999 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -1,4 +1,4 @@ -const { matchModelName } = require('../utils/tokens'); +const { matchModelName } = require('@librechat/api'); const defaultRate = 6; /** diff --git a/api/package.json b/api/package.json index 1f496b5b2..c9a0435b4 100644 --- a/api/package.json +++ b/api/package.json @@ -49,7 +49,7 @@ "@langchain/google-vertexai": "^0.2.13", "@langchain/openai": "^0.5.18", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.77", + "@librechat/agents": "^2.4.79", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index d81228574..18ab0ae6e 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -872,11 +872,10 @@ class AgentClient extends BaseClient { if (agent.useLegacyContent === true) { messages = formatContentStrings(messages); } - if ( - agent.model_parameters?.clientOptions?.defaultHeaders?.['anthropic-beta']?.includes( - 'prompt-caching', - ) - ) { + const defaultHeaders = + agent.model_parameters?.clientOptions?.defaultHeaders ?? + agent.model_parameters?.configuration?.defaultHeaders; + if (defaultHeaders?.['anthropic-beta']?.includes('prompt-caching')) { messages = addCacheControl(messages); } diff --git a/api/server/controllers/assistants/chatV1.js b/api/server/controllers/assistants/chatV1.js index b170b916a..4bd49e04d 100644 --- a/api/server/controllers/assistants/chatV1.js +++ b/api/server/controllers/assistants/chatV1.js @@ -1,7 +1,7 @@ const { v4 } = require('uuid'); const { sleep } = require('@librechat/agents'); const { logger } = require('@librechat/data-schemas'); -const { sendEvent, getBalanceConfig } = require('@librechat/api'); +const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api'); const { Time, Constants, @@ -34,7 +34,6 @@ const { checkBalance } = require('~/models/balanceMethods'); const { getConvo } = require('~/models/Conversation'); const getLogStores = require('~/cache/getLogStores'); const { countTokens } = require('~/server/utils'); -const { getModelMaxTokens } = require('~/utils'); const { getOpenAIClient } = require('./helpers'); /** diff --git a/api/server/controllers/assistants/chatV2.js b/api/server/controllers/assistants/chatV2.js index cfcaf2ee3..20b3398ee 100644 --- a/api/server/controllers/assistants/chatV2.js +++ b/api/server/controllers/assistants/chatV2.js @@ -1,7 +1,7 @@ const { v4 } = require('uuid'); const { sleep } = require('@librechat/agents'); const { logger } = require('@librechat/data-schemas'); -const { sendEvent, getBalanceConfig } = require('@librechat/api'); +const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api'); const { Time, Constants, @@ -31,7 +31,6 @@ const { checkBalance } = require('~/models/balanceMethods'); const { getConvo } = require('~/models/Conversation'); const getLogStores = require('~/cache/getLogStores'); const { countTokens } = require('~/server/utils'); -const { getModelMaxTokens } = require('~/utils'); const { getOpenAIClient } = require('./helpers'); /** diff --git a/api/server/services/Endpoints/agents/agent.js b/api/server/services/Endpoints/agents/agent.js index 56b4bf058..1966834ed 100644 --- a/api/server/services/Endpoints/agents/agent.js +++ b/api/server/services/Endpoints/agents/agent.js @@ -1,6 +1,7 @@ const { Providers } = require('@librechat/agents'); const { primeResources, + getModelMaxTokens, extractLibreChatParams, optionalChainWithEmptyCheck, } = require('@librechat/api'); @@ -17,7 +18,6 @@ const { getProviderConfig } = require('~/server/services/Endpoints'); const { processFiles } = require('~/server/services/Files/process'); const { getFiles, getToolFilesByIds } = require('~/models/File'); const { getConvoFiles } = require('~/models/Conversation'); -const { getModelMaxTokens } = require('~/utils'); /** * @param {object} params diff --git a/api/server/services/Endpoints/anthropic/initialize.js b/api/server/services/Endpoints/anthropic/initialize.js index 48b452672..6e661da67 100644 --- a/api/server/services/Endpoints/anthropic/initialize.js +++ b/api/server/services/Endpoints/anthropic/initialize.js @@ -1,6 +1,6 @@ +const { getLLMConfig } = require('@librechat/api'); const { EModelEndpoint } = require('librechat-data-provider'); const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService'); -const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm'); const AnthropicClient = require('~/app/clients/AnthropicClient'); const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => { @@ -40,7 +40,6 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio clientOptions = Object.assign( { proxy: PROXY ?? null, - userId: req.user.id, reverseProxyUrl: ANTHROPIC_REVERSE_PROXY ?? null, modelOptions: endpointOption?.model_parameters ?? {}, }, @@ -49,6 +48,7 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio if (overrideModel) { clientOptions.modelOptions.model = overrideModel; } + clientOptions.modelOptions.user = req.user.id; return getLLMConfig(anthropicApiKey, clientOptions); } diff --git a/api/server/services/Endpoints/anthropic/llm.js b/api/server/services/Endpoints/anthropic/llm.js deleted file mode 100644 index 2cb76d5f9..000000000 --- a/api/server/services/Endpoints/anthropic/llm.js +++ /dev/null @@ -1,103 +0,0 @@ -const { ProxyAgent } = require('undici'); -const { anthropicSettings, removeNullishValues } = require('librechat-data-provider'); -const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers'); - -/** - * Generates configuration options for creating an Anthropic language model (LLM) instance. - * - * @param {string} apiKey - The API key for authentication with Anthropic. - * @param {Object} [options={}] - Additional options for configuring the LLM. - * @param {Object} [options.modelOptions] - Model-specific options. - * @param {string} [options.modelOptions.model] - The name of the model to use. - * @param {number} [options.modelOptions.maxOutputTokens] - The maximum number of tokens to generate. - * @param {number} [options.modelOptions.temperature] - Controls randomness in output generation. - * @param {number} [options.modelOptions.topP] - Controls diversity of output generation. - * @param {number} [options.modelOptions.topK] - Controls the number of top tokens to consider. - * @param {string[]} [options.modelOptions.stop] - Sequences where the API will stop generating further tokens. - * @param {boolean} [options.modelOptions.stream] - Whether to stream the response. - * @param {string} options.userId - The user ID for tracking and personalization. - * @param {string} [options.proxy] - Proxy server URL. - * @param {string} [options.reverseProxyUrl] - URL for a reverse proxy, if used. - * - * @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed. - */ -function getLLMConfig(apiKey, options = {}) { - const systemOptions = { - thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default, - promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default, - thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default, - }; - for (let key in systemOptions) { - delete options.modelOptions[key]; - } - const defaultOptions = { - model: anthropicSettings.model.default, - maxOutputTokens: anthropicSettings.maxOutputTokens.default, - stream: true, - }; - - const mergedOptions = Object.assign(defaultOptions, options.modelOptions); - - /** @type {AnthropicClientOptions} */ - let requestOptions = { - apiKey, - model: mergedOptions.model, - stream: mergedOptions.stream, - temperature: mergedOptions.temperature, - stopSequences: mergedOptions.stop, - maxTokens: - mergedOptions.maxOutputTokens || anthropicSettings.maxOutputTokens.reset(mergedOptions.model), - clientOptions: {}, - invocationKwargs: { - metadata: { - user_id: options.userId, - }, - }, - }; - - requestOptions = configureReasoning(requestOptions, systemOptions); - - if (!/claude-3[-.]7/.test(mergedOptions.model)) { - requestOptions.topP = mergedOptions.topP; - requestOptions.topK = mergedOptions.topK; - } else if (requestOptions.thinking == null) { - requestOptions.topP = mergedOptions.topP; - requestOptions.topK = mergedOptions.topK; - } - - const supportsCacheControl = - systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model); - const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl); - if (headers) { - requestOptions.clientOptions.defaultHeaders = headers; - } - - if (options.proxy) { - const proxyAgent = new ProxyAgent(options.proxy); - requestOptions.clientOptions.fetchOptions = { - dispatcher: proxyAgent, - }; - } - - if (options.reverseProxyUrl) { - requestOptions.clientOptions.baseURL = options.reverseProxyUrl; - requestOptions.anthropicApiUrl = options.reverseProxyUrl; - } - - const tools = []; - - if (mergedOptions.web_search) { - tools.push({ - type: 'web_search_20250305', - name: 'web_search', - }); - } - - return { - tools, - /** @type {AnthropicClientOptions} */ - llmConfig: removeNullishValues(requestOptions), - }; -} - -module.exports = { getLLMConfig }; diff --git a/api/server/services/Endpoints/bedrock/initialize.js b/api/server/services/Endpoints/bedrock/initialize.js index 4d9ba361c..bbee7caf3 100644 --- a/api/server/services/Endpoints/bedrock/initialize.js +++ b/api/server/services/Endpoints/bedrock/initialize.js @@ -1,3 +1,4 @@ +const { getModelMaxTokens } = require('@librechat/api'); const { createContentAggregator } = require('@librechat/agents'); const { EModelEndpoint, @@ -7,7 +8,6 @@ const { const { getDefaultHandlers } = require('~/server/controllers/agents/callbacks'); const getOptions = require('~/server/services/Endpoints/bedrock/options'); const AgentClient = require('~/server/controllers/agents/client'); -const { getModelMaxTokens } = require('~/utils'); const initializeClient = async ({ req, res, endpointOption }) => { if (!endpointOption) { diff --git a/api/server/services/ModelService.js b/api/server/services/ModelService.js index e9876269c..10b08c99a 100644 --- a/api/server/services/ModelService.js +++ b/api/server/services/ModelService.js @@ -1,13 +1,13 @@ const axios = require('axios'); const { Providers } = require('@librechat/agents'); -const { logAxiosError } = require('@librechat/api'); const { logger } = require('@librechat/data-schemas'); const { HttpsProxyAgent } = require('https-proxy-agent'); +const { logAxiosError, inputSchema, processModelData } = require('@librechat/api'); const { EModelEndpoint, defaultModels, CacheKeys } = require('librechat-data-provider'); -const { inputSchema, extractBaseURL, processModelData } = require('~/utils'); const { OllamaClient } = require('~/app/clients/OllamaClient'); const { isUserProvided } = require('~/server/utils'); const getLogStores = require('~/cache/getLogStores'); +const { extractBaseURL } = require('~/utils'); /** * Splits a string by commas and trims each resulting value. diff --git a/api/server/services/ModelService.spec.js b/api/server/services/ModelService.spec.js index 33ab9a7aa..d193b65f4 100644 --- a/api/server/services/ModelService.spec.js +++ b/api/server/services/ModelService.spec.js @@ -11,8 +11,8 @@ const { getAnthropicModels, } = require('./ModelService'); -jest.mock('~/utils', () => { - const originalUtils = jest.requireActual('~/utils'); +jest.mock('@librechat/api', () => { + const originalUtils = jest.requireActual('@librechat/api'); return { ...originalUtils, processModelData: jest.fn((...args) => { @@ -108,7 +108,7 @@ describe('fetchModels with createTokenConfig true', () => { beforeEach(() => { // Clears the mock's history before each test - const _utils = require('~/utils'); + const _utils = require('@librechat/api'); axios.get.mockResolvedValue({ data }); }); @@ -120,7 +120,7 @@ describe('fetchModels with createTokenConfig true', () => { createTokenConfig: true, }); - const { processModelData } = require('~/utils'); + const { processModelData } = require('@librechat/api'); expect(processModelData).toHaveBeenCalled(); expect(processModelData).toHaveBeenCalledWith(data); }); diff --git a/api/utils/deriveBaseURL.spec.js b/api/utils/deriveBaseURL.spec.js index 6df0bc65c..50f64257f 100644 --- a/api/utils/deriveBaseURL.spec.js +++ b/api/utils/deriveBaseURL.spec.js @@ -1,7 +1,7 @@ const axios = require('axios'); const deriveBaseURL = require('./deriveBaseURL'); -jest.mock('~/utils', () => { - const originalUtils = jest.requireActual('~/utils'); +jest.mock('@librechat/api', () => { + const originalUtils = jest.requireActual('@librechat/api'); return { ...originalUtils, processModelData: jest.fn((...args) => { diff --git a/api/utils/index.js b/api/utils/index.js index b80c9b0c3..dc5f3a673 100644 --- a/api/utils/index.js +++ b/api/utils/index.js @@ -1,4 +1,3 @@ -const tokenHelpers = require('./tokens'); const deriveBaseURL = require('./deriveBaseURL'); const extractBaseURL = require('./extractBaseURL'); const findMessageContent = require('./findMessageContent'); @@ -6,6 +5,5 @@ const findMessageContent = require('./findMessageContent'); module.exports = { deriveBaseURL, extractBaseURL, - ...tokenHelpers, findMessageContent, }; diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index 6d09b012b..338322e0e 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -1,12 +1,12 @@ const { EModelEndpoint } = require('librechat-data-provider'); const { + maxTokensMap, + matchModelName, + processModelData, + getModelMaxTokens, maxOutputTokensMap, findMatchingPattern, - getModelMaxTokens, - processModelData, - matchModelName, - maxTokensMap, -} = require('./tokens'); +} = require('@librechat/api'); describe('getModelMaxTokens', () => { test('should return correct tokens for exact match', () => { @@ -394,7 +394,7 @@ describe('getModelMaxTokens', () => { }); test('should return correct max output tokens for GPT-5 models', () => { - const { getModelMaxOutputTokens } = require('./tokens'); + const { getModelMaxOutputTokens } = require('@librechat/api'); ['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => { expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]); expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe( @@ -407,7 +407,7 @@ describe('getModelMaxTokens', () => { }); test('should return correct max output tokens for GPT-OSS models', () => { - const { getModelMaxOutputTokens } = require('./tokens'); + const { getModelMaxOutputTokens } = require('@librechat/api'); ['gpt-oss-20b', 'gpt-oss-120b'].forEach((model) => { expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]); expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe( diff --git a/package-lock.json b/package-lock.json index 6694c3249..16ef73fa6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -64,7 +64,7 @@ "@langchain/google-vertexai": "^0.2.13", "@langchain/openai": "^0.5.18", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.77", + "@librechat/agents": "^2.4.79", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -21909,9 +21909,9 @@ } }, "node_modules/@librechat/agents": { - "version": "2.4.77", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.77.tgz", - "integrity": "sha512-x7fWbbdJpy8VpIYJa7E0laBUmtgveTmTzYS8QFkXUMjzqSx7nN5ruM6rzmcodOWRXt7IrB12k4VehJ1zUnb29A==", + "version": "2.4.79", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.79.tgz", + "integrity": "sha512-Ha8tBPNy9ycPMH+GfBL8lUKz4vC3aXWSO1BZt7x9wDkfVLQBd3XhtkYv0xMvA8y7i6YMowBoyAkkWpX3R8DeJg==", "license": "MIT", "dependencies": { "@langchain/anthropic": "^0.3.26", @@ -51711,7 +51711,7 @@ }, "packages/api": { "name": "@librechat/api", - "version": "1.3.4", + "version": "1.3.5", "license": "ISC", "devDependencies": { "@babel/preset-env": "^7.21.5", @@ -51744,7 +51744,7 @@ }, "peerDependencies": { "@langchain/core": "^0.3.62", - "@librechat/agents": "^2.4.77", + "@librechat/agents": "^2.4.79", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.17.1", "axios": "^1.8.2", diff --git a/packages/api/package.json b/packages/api/package.json index 3ed5e57b1..9d3c3ad8b 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -1,6 +1,6 @@ { "name": "@librechat/api", - "version": "1.3.4", + "version": "1.3.5", "type": "commonjs", "description": "MCP services for LibreChat", "main": "dist/index.js", @@ -73,7 +73,7 @@ }, "peerDependencies": { "@langchain/core": "^0.3.62", - "@librechat/agents": "^2.4.77", + "@librechat/agents": "^2.4.79", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.17.1", "axios": "^1.8.2", diff --git a/api/server/services/Endpoints/anthropic/helpers.js b/packages/api/src/endpoints/anthropic/helpers.ts similarity index 77% rename from api/server/services/Endpoints/anthropic/helpers.js rename to packages/api/src/endpoints/anthropic/helpers.ts index e47e5abb4..ae199ce89 100644 --- a/api/server/services/Endpoints/anthropic/helpers.js +++ b/packages/api/src/endpoints/anthropic/helpers.ts @@ -1,13 +1,14 @@ -const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider'); -const { matchModelName } = require('~/utils'); -const { logger } = require('~/config'); +import { logger } from '@librechat/data-schemas'; +import { AnthropicClientOptions } from '@librechat/agents'; +import { EModelEndpoint, anthropicSettings } from 'librechat-data-provider'; +import { matchModelName } from '~/utils/tokens'; /** * @param {string} modelName * @returns {boolean} */ -function checkPromptCacheSupport(modelName) { - const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic); +function checkPromptCacheSupport(modelName: string): boolean { + const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic) ?? ''; if ( modelMatch.includes('claude-3-5-sonnet-latest') || modelMatch.includes('claude-3.5-sonnet-latest') @@ -31,7 +32,10 @@ function checkPromptCacheSupport(modelName) { * @param {boolean} supportsCacheControl Whether the model supports cache control * @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable */ -function getClaudeHeaders(model, supportsCacheControl) { +function getClaudeHeaders( + model: string, + supportsCacheControl: boolean, +): Record | undefined { if (!supportsCacheControl) { return undefined; } @@ -72,9 +76,13 @@ function getClaudeHeaders(model, supportsCacheControl) { * @param {number|null} extendedOptions.thinkingBudget The token budget for thinking * @returns {Object} Updated request options */ -function configureReasoning(anthropicInput, extendedOptions = {}) { +function configureReasoning( + anthropicInput: AnthropicClientOptions & { max_tokens?: number }, + extendedOptions: { thinking?: boolean; thinkingBudget?: number | null } = {}, +): AnthropicClientOptions & { max_tokens?: number } { const updatedOptions = { ...anthropicInput }; const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens; + if ( extendedOptions.thinking && updatedOptions?.model && @@ -82,11 +90,16 @@ function configureReasoning(anthropicInput, extendedOptions = {}) { /claude-(?:sonnet|opus|haiku)-[4-9]/.test(updatedOptions.model)) ) { updatedOptions.thinking = { + ...updatedOptions.thinking, type: 'enabled', - }; + } as { type: 'enabled'; budget_tokens: number }; } - if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) { + if ( + updatedOptions.thinking != null && + extendedOptions.thinkingBudget != null && + updatedOptions.thinking.type === 'enabled' + ) { updatedOptions.thinking = { ...updatedOptions.thinking, budget_tokens: extendedOptions.thinkingBudget, @@ -95,9 +108,10 @@ function configureReasoning(anthropicInput, extendedOptions = {}) { if ( updatedOptions.thinking != null && + updatedOptions.thinking.type === 'enabled' && (currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens) ) { - const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model); + const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model ?? ''); updatedOptions.max_tokens = currentMaxTokens ?? maxTokens; logger.warn( @@ -115,4 +129,4 @@ function configureReasoning(anthropicInput, extendedOptions = {}) { return updatedOptions; } -module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning }; +export { checkPromptCacheSupport, getClaudeHeaders, configureReasoning }; diff --git a/packages/api/src/endpoints/anthropic/index.ts b/packages/api/src/endpoints/anthropic/index.ts new file mode 100644 index 000000000..724cfda75 --- /dev/null +++ b/packages/api/src/endpoints/anthropic/index.ts @@ -0,0 +1,2 @@ +export * from './helpers'; +export * from './llm'; diff --git a/api/server/services/Endpoints/anthropic/llm.spec.js b/packages/api/src/endpoints/anthropic/llm.spec.ts similarity index 91% rename from api/server/services/Endpoints/anthropic/llm.spec.js rename to packages/api/src/endpoints/anthropic/llm.spec.ts index fc132008f..447c10a07 100644 --- a/api/server/services/Endpoints/anthropic/llm.spec.js +++ b/packages/api/src/endpoints/anthropic/llm.spec.ts @@ -1,4 +1,5 @@ -const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm'); +import { getLLMConfig } from './llm'; +import type * as t from '~/types'; jest.mock('https-proxy-agent', () => ({ HttpsProxyAgent: jest.fn().mockImplementation((proxy) => ({ proxy })), @@ -25,9 +26,9 @@ describe('getLLMConfig', () => { }); expect(result.llmConfig.clientOptions).toHaveProperty('fetchOptions'); - expect(result.llmConfig.clientOptions.fetchOptions).toHaveProperty('dispatcher'); - expect(result.llmConfig.clientOptions.fetchOptions.dispatcher).toBeDefined(); - expect(result.llmConfig.clientOptions.fetchOptions.dispatcher.constructor.name).toBe( + expect(result.llmConfig.clientOptions?.fetchOptions).toHaveProperty('dispatcher'); + expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher).toBeDefined(); + expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher.constructor.name).toBe( 'ProxyAgent', ); }); @@ -93,9 +94,10 @@ describe('getLLMConfig', () => { }; const result = getLLMConfig('test-key', { modelOptions }); const clientOptions = result.llmConfig.clientOptions; - expect(clientOptions.defaultHeaders).toBeDefined(); - expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta'); - expect(clientOptions.defaultHeaders['anthropic-beta']).toBe( + expect(clientOptions?.defaultHeaders).toBeDefined(); + expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta'); + const defaultHeaders = clientOptions?.defaultHeaders as Record; + expect(defaultHeaders['anthropic-beta']).toBe( 'prompt-caching-2024-07-31,context-1m-2025-08-07', ); }); @@ -111,9 +113,10 @@ describe('getLLMConfig', () => { const modelOptions = { model, promptCache: true }; const result = getLLMConfig('test-key', { modelOptions }); const clientOptions = result.llmConfig.clientOptions; - expect(clientOptions.defaultHeaders).toBeDefined(); - expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta'); - expect(clientOptions.defaultHeaders['anthropic-beta']).toBe( + expect(clientOptions?.defaultHeaders).toBeDefined(); + expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta'); + const defaultHeaders = clientOptions?.defaultHeaders as Record; + expect(defaultHeaders['anthropic-beta']).toBe( 'prompt-caching-2024-07-31,context-1m-2025-08-07', ); }); @@ -211,13 +214,13 @@ describe('getLLMConfig', () => { it('should handle empty modelOptions', () => { expect(() => { getLLMConfig('test-api-key', {}); - }).toThrow("Cannot read properties of undefined (reading 'thinking')"); + }).toThrow('No modelOptions provided'); }); it('should handle no options parameter', () => { expect(() => { getLLMConfig('test-api-key'); - }).toThrow("Cannot read properties of undefined (reading 'thinking')"); + }).toThrow('No modelOptions provided'); }); it('should handle temperature, stop sequences, and stream settings', () => { @@ -238,7 +241,7 @@ describe('getLLMConfig', () => { const result = getLLMConfig('test-api-key', { modelOptions: { model: 'claude-3-opus', - maxOutputTokens: null, + maxOutputTokens: undefined, }, }); @@ -254,9 +257,9 @@ describe('getLLMConfig', () => { }); expect(result.llmConfig.clientOptions).toHaveProperty('fetchOptions'); - expect(result.llmConfig.clientOptions.fetchOptions).toHaveProperty('dispatcher'); - expect(result.llmConfig.clientOptions.fetchOptions.dispatcher).toBeDefined(); - expect(result.llmConfig.clientOptions.fetchOptions.dispatcher.constructor.name).toBe( + expect(result.llmConfig.clientOptions?.fetchOptions).toHaveProperty('dispatcher'); + expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher).toBeDefined(); + expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher.constructor.name).toBe( 'ProxyAgent', ); expect(result.llmConfig.clientOptions).toHaveProperty('baseURL', 'https://reverse-proxy.com'); @@ -272,7 +275,7 @@ describe('getLLMConfig', () => { }); // claude-3-5-sonnet supports prompt caching and should get the appropriate headers - expect(result.llmConfig.clientOptions.defaultHeaders).toEqual({ + expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({ 'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31', }); }); @@ -325,7 +328,7 @@ describe('getLLMConfig', () => { it('should handle all nullish values removal', () => { const result = getLLMConfig('test-api-key', { modelOptions: { - temperature: null, + temperature: undefined, topP: undefined, topK: 0, stop: [], @@ -359,9 +362,11 @@ describe('getLLMConfig', () => { // Simulate clientOptions from initialize.js const clientOptions = { proxy: null, - userId: 'test-user-id-123', reverseProxyUrl: null, - modelOptions: endpointOption.model_parameters, + modelOptions: { + ...endpointOption.model_parameters, + user: 'test-user-id-123', + }, streamRate: 25, titleModel: 'claude-3-haiku', }; @@ -390,12 +395,12 @@ describe('getLLMConfig', () => { const anthropicApiKey = 'sk-ant-proxy-key'; const clientOptions = { proxy: 'http://corporate-proxy:8080', - userId: 'proxy-user-456', reverseProxyUrl: null, modelOptions: { model: 'claude-3-opus', temperature: 0.3, maxOutputTokens: 2048, + user: 'proxy-user-456', }, }; @@ -412,8 +417,8 @@ describe('getLLMConfig', () => { }, }, }); - expect(result.llmConfig.clientOptions.fetchOptions).toHaveProperty('dispatcher'); - expect(result.llmConfig.clientOptions.fetchOptions.dispatcher.constructor.name).toBe( + expect(result.llmConfig.clientOptions?.fetchOptions).toHaveProperty('dispatcher'); + expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher.constructor.name).toBe( 'ProxyAgent', ); }); @@ -423,12 +428,12 @@ describe('getLLMConfig', () => { const reverseProxyUrl = 'https://api.custom-anthropic.com/v1'; const clientOptions = { proxy: null, - userId: 'reverse-proxy-user', reverseProxyUrl: reverseProxyUrl, modelOptions: { model: 'claude-3-5-haiku', temperature: 0.5, stream: false, + user: 'reverse-proxy-user', }, }; @@ -450,7 +455,6 @@ describe('getLLMConfig', () => { describe('Model-Specific Real Usage Scenarios', () => { it('should handle Claude-3.7 with thinking enabled like production', () => { const clientOptions = { - userId: 'thinking-user-789', modelOptions: { model: 'claude-3-7-sonnet', temperature: 0.4, @@ -460,6 +464,7 @@ describe('getLLMConfig', () => { thinking: true, thinkingBudget: 3000, promptCache: true, + user: 'thinking-user-789', }, }; @@ -479,7 +484,7 @@ describe('getLLMConfig', () => { expect(result.llmConfig).not.toHaveProperty('topP'); expect(result.llmConfig).not.toHaveProperty('topK'); // Should have appropriate headers for Claude-3.7 with prompt cache - expect(result.llmConfig.clientOptions.defaultHeaders).toEqual({ + expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({ 'anthropic-beta': 'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31', }); @@ -487,12 +492,12 @@ describe('getLLMConfig', () => { it('should handle web search functionality like production', () => { const clientOptions = { - userId: 'websearch-user-303', modelOptions: { model: 'claude-3-5-sonnet-latest', temperature: 0.6, maxOutputTokens: 4096, web_search: true, + user: 'websearch-user-303', }, }; @@ -516,7 +521,6 @@ describe('getLLMConfig', () => { it('should handle complex production configuration', () => { const clientOptions = { proxy: 'http://prod-proxy.company.com:3128', - userId: 'prod-user-enterprise-404', reverseProxyUrl: 'https://anthropic-gateway.company.com/v1', modelOptions: { model: 'claude-3-opus-20240229', @@ -527,6 +531,7 @@ describe('getLLMConfig', () => { stop: ['\\n\\nHuman:', '\\n\\nAssistant:', 'END_CONVERSATION'], stream: true, promptCache: true, + user: 'prod-user-enterprise-404', }, streamRate: 15, // Conservative stream rate titleModel: 'claude-3-haiku-20240307', @@ -571,10 +576,10 @@ describe('getLLMConfig', () => { // Regular options that should remain topP: 0.9, topK: 40, + user: 'system-options-user', }; const clientOptions = { - userId: 'system-options-user', modelOptions, }; @@ -592,29 +597,30 @@ describe('getLLMConfig', () => { }); describe('Error Handling and Edge Cases from Real Usage', () => { - it('should handle missing userId gracefully', () => { + it('should handle missing `user` ID string gracefully', () => { const clientOptions = { modelOptions: { model: 'claude-3-haiku', temperature: 0.5, + // `user` is missing }, - // userId is missing }; const result = getLLMConfig('sk-ant-no-user-key', clientOptions); - expect(result.llmConfig.invocationKwargs.metadata).toMatchObject({ + expect(result.llmConfig.invocationKwargs?.metadata).toMatchObject({ user_id: undefined, }); }); it('should handle large parameter sets without performance issues', () => { - const largeModelOptions = { + const largeModelOptions: Record = { model: 'claude-3-opus', temperature: 0.7, maxOutputTokens: 4096, topP: 0.9, topK: 40, + user: 'performance-test-user', }; // Add many additional properties to test performance @@ -623,7 +629,6 @@ describe('getLLMConfig', () => { } const clientOptions = { - userId: 'performance-test-user', modelOptions: largeModelOptions, proxy: 'http://performance-proxy:8080', reverseProxyUrl: 'https://performance-reverse-proxy.com', @@ -654,7 +659,6 @@ describe('getLLMConfig', () => { modelVariations.forEach((model) => { const clientOptions = { - userId: 'model-variation-user', modelOptions: { model, temperature: 0.5, @@ -662,6 +666,7 @@ describe('getLLMConfig', () => { topK: 40, thinking: true, promptCache: true, + user: 'model-variation-user', }, }; @@ -720,7 +725,7 @@ describe('getLLMConfig', () => { budget_tokens: 2000, // default thinkingBudget }); // Should have prompt cache headers by default - expect(result.llmConfig.clientOptions.defaultHeaders).toBeDefined(); + expect(result.llmConfig.clientOptions?.defaultHeaders).toBeDefined(); }); }); @@ -810,7 +815,9 @@ describe('getLLMConfig', () => { thinkingBudget, }, }); - expect(result.llmConfig.thinking.budget_tokens).toBe(expected); + expect((result.llmConfig.thinking as t.ThinkingConfigEnabled)?.budget_tokens).toBe( + expected, + ); }); }); }); @@ -839,12 +846,14 @@ describe('getLLMConfig', () => { thinkingBudget, }, }); - expect(result.llmConfig.thinking.budget_tokens).toBe(expectedBudget); + expect((result.llmConfig.thinking as t.ThinkingConfigEnabled)?.budget_tokens).toBe( + expectedBudget, + ); }); }); it('should handle topP/topK exclusion logic for Claude-3.7 models', () => { - const testCases = [ + const testCases: (t.AnthropicModelOptions & { shouldInclude: boolean })[] = [ // Claude-3.7 with thinking = true - should exclude topP/topK { model: 'claude-3-7-sonnet', thinking: true, shouldInclude: false }, { model: 'claude-3.7-sonnet', thinking: true, shouldInclude: false }, @@ -900,13 +909,15 @@ describe('getLLMConfig', () => { modelOptions: { model, promptCache }, }); + const headers = result.llmConfig.clientOptions?.defaultHeaders; + if (shouldHaveHeaders) { - expect(result.llmConfig.clientOptions.defaultHeaders).toBeDefined(); - expect(result.llmConfig.clientOptions.defaultHeaders['anthropic-beta']).toContain( + expect(headers).toBeDefined(); + expect((headers as Record)['anthropic-beta']).toContain( 'prompt-caching', ); } else { - expect(result.llmConfig.clientOptions.defaultHeaders).toBeUndefined(); + expect(headers).toBeUndefined(); } }); }); @@ -926,8 +937,8 @@ describe('getLLMConfig', () => { ]; testCases.forEach((testCase) => { - const key = Object.keys(testCase)[0]; - const value = testCase[key]; + const key = Object.keys(testCase)[0] as keyof t.AnthropicModelOptions; + const value = (testCase as unknown as t.AnthropicModelOptions)[key]; const expected = testCase.expected; const result = getLLMConfig('test-key', { @@ -935,7 +946,7 @@ describe('getLLMConfig', () => { }); const outputKey = key === 'maxOutputTokens' ? 'maxTokens' : key; - expect(result.llmConfig[outputKey]).toBe(expected); + expect(result.llmConfig[outputKey as keyof typeof result.llmConfig]).toBe(expected); }); }); @@ -950,7 +961,7 @@ describe('getLLMConfig', () => { testCases.forEach(({ stop, expected }) => { const result = getLLMConfig('test-key', { - modelOptions: { model: 'claude-3-opus', stop }, + modelOptions: { model: 'claude-3-opus', stop } as t.AnthropicModelOptions, }); if (expected === null || expected === undefined) { @@ -978,8 +989,8 @@ describe('getLLMConfig', () => { ]; testCases.forEach((testCase) => { - const key = Object.keys(testCase)[0]; - const value = testCase[key]; + const key = Object.keys(testCase)[0] as keyof t.AnthropicModelOptions; + const value = (testCase as unknown as t.AnthropicModelOptions)[key]; const expected = testCase.expected; const result = getLLMConfig('test-key', { @@ -1049,7 +1060,7 @@ describe('getLLMConfig', () => { // thinking is false, so no thinking object should be created expect(result.llmConfig.thinking).toBeUndefined(); // promptCache default is true, so should have headers - expect(result.llmConfig.clientOptions.defaultHeaders).toBeDefined(); + expect(result.llmConfig.clientOptions?.defaultHeaders).toBeDefined(); }); }); @@ -1125,7 +1136,7 @@ describe('getLLMConfig', () => { testCases.forEach(({ stop, expected }) => { const result = getLLMConfig('test-key', { - modelOptions: { model: 'claude-3-opus', stop }, + modelOptions: { model: 'claude-3-opus', stop } as t.AnthropicModelOptions, }); expect(result.llmConfig.stopSequences).toEqual(expected); diff --git a/packages/api/src/endpoints/anthropic/llm.ts b/packages/api/src/endpoints/anthropic/llm.ts new file mode 100644 index 000000000..1ba4090c7 --- /dev/null +++ b/packages/api/src/endpoints/anthropic/llm.ts @@ -0,0 +1,105 @@ +import { Dispatcher, ProxyAgent } from 'undici'; +import { AnthropicClientOptions } from '@librechat/agents'; +import { anthropicSettings, removeNullishValues } from 'librechat-data-provider'; +import type { AnthropicLLMConfigResult, AnthropicConfigOptions } from '~/types/anthropic'; +import { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } from './helpers'; + +/** + * Generates configuration options for creating an Anthropic language model (LLM) instance. + * @param apiKey - The API key for authentication with Anthropic. + * @param options={} - Additional options for configuring the LLM. + * @returns Configuration options for creating an Anthropic LLM instance, with null and undefined values removed. + */ +function getLLMConfig( + apiKey?: string, + options: AnthropicConfigOptions = {} as AnthropicConfigOptions, +): AnthropicLLMConfigResult { + const systemOptions = { + thinking: options.modelOptions?.thinking ?? anthropicSettings.thinking.default, + promptCache: options.modelOptions?.promptCache ?? anthropicSettings.promptCache.default, + thinkingBudget: + options.modelOptions?.thinkingBudget ?? anthropicSettings.thinkingBudget.default, + }; + + /** Couldn't figure out a way to still loop through the object while deleting the overlapping keys when porting this + * over from javascript, so for now they are being deleted manually until a better way presents itself. + */ + if (options.modelOptions) { + delete options.modelOptions.thinking; + delete options.modelOptions.promptCache; + delete options.modelOptions.thinkingBudget; + } else { + throw new Error('No modelOptions provided'); + } + + const defaultOptions = { + model: anthropicSettings.model.default, + maxOutputTokens: anthropicSettings.maxOutputTokens.default, + stream: true, + }; + + const mergedOptions = Object.assign(defaultOptions, options.modelOptions); + + let requestOptions: AnthropicClientOptions & { stream?: boolean } = { + apiKey, + model: mergedOptions.model, + stream: mergedOptions.stream, + temperature: mergedOptions.temperature, + stopSequences: mergedOptions.stop, + maxTokens: + mergedOptions.maxOutputTokens || anthropicSettings.maxOutputTokens.reset(mergedOptions.model), + clientOptions: {}, + invocationKwargs: { + metadata: { + user_id: mergedOptions.user, + }, + }, + }; + + requestOptions = configureReasoning(requestOptions, systemOptions); + + if (!/claude-3[-.]7/.test(mergedOptions.model)) { + requestOptions.topP = mergedOptions.topP; + requestOptions.topK = mergedOptions.topK; + } else if (requestOptions.thinking == null) { + requestOptions.topP = mergedOptions.topP; + requestOptions.topK = mergedOptions.topK; + } + + const supportsCacheControl = + systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model ?? ''); + const headers = getClaudeHeaders(requestOptions.model ?? '', supportsCacheControl); + if (headers && requestOptions.clientOptions) { + requestOptions.clientOptions.defaultHeaders = headers; + } + + if (options.proxy && requestOptions.clientOptions) { + const proxyAgent = new ProxyAgent(options.proxy); + requestOptions.clientOptions.fetchOptions = { + dispatcher: proxyAgent, + }; + } + + if (options.reverseProxyUrl && requestOptions.clientOptions) { + requestOptions.clientOptions.baseURL = options.reverseProxyUrl; + requestOptions.anthropicApiUrl = options.reverseProxyUrl; + } + + const tools = []; + + if (mergedOptions.web_search) { + tools.push({ + type: 'web_search_20250305', + name: 'web_search', + }); + } + + return { + tools, + llmConfig: removeNullishValues( + requestOptions as Record, + ) as AnthropicClientOptions & { clientOptions?: { fetchOptions?: { dispatcher: Dispatcher } } }, + }; +} + +export { getLLMConfig }; diff --git a/packages/api/src/endpoints/google/llm.ts b/packages/api/src/endpoints/google/llm.ts index 0f82ce042..d5b821ee9 100644 --- a/packages/api/src/endpoints/google/llm.ts +++ b/packages/api/src/endpoints/google/llm.ts @@ -1,5 +1,5 @@ import { Providers } from '@librechat/agents'; -import { googleSettings, AuthKeys } from 'librechat-data-provider'; +import { googleSettings, AuthKeys, removeNullishValues } from 'librechat-data-provider'; import type { GoogleClientOptions, VertexAIClientOptions } from '@librechat/agents'; import type { GoogleAIToolType } from '@langchain/google-common'; import type * as t from '~/types'; @@ -112,11 +112,15 @@ export function getGoogleConfig( ...modelOptions } = options.modelOptions || {}; - const llmConfig: GoogleClientOptions | VertexAIClientOptions = { + const llmConfig: GoogleClientOptions | VertexAIClientOptions = removeNullishValues({ ...(modelOptions || {}), model: modelOptions?.model ?? '', maxRetries: 2, - }; + topP: modelOptions?.topP ?? undefined, + topK: modelOptions?.topK ?? undefined, + temperature: modelOptions?.temperature ?? undefined, + maxOutputTokens: modelOptions?.maxOutputTokens ?? undefined, + }); /** Used only for Safety Settings */ llmConfig.safetySettings = getSafetySettings(llmConfig.model); diff --git a/packages/api/src/endpoints/index.ts b/packages/api/src/endpoints/index.ts index 7b98ffcb6..56b95bf52 100644 --- a/packages/api/src/endpoints/index.ts +++ b/packages/api/src/endpoints/index.ts @@ -1,3 +1,4 @@ export * from './custom'; export * from './google'; export * from './openai'; +export * from './anthropic'; diff --git a/packages/api/src/endpoints/openai/config.anthropic.spec.ts b/packages/api/src/endpoints/openai/config.anthropic.spec.ts new file mode 100644 index 000000000..cf82efcae --- /dev/null +++ b/packages/api/src/endpoints/openai/config.anthropic.spec.ts @@ -0,0 +1,551 @@ +import { getOpenAIConfig } from './config'; + +describe('getOpenAIConfig - Anthropic Compatibility', () => { + describe('Anthropic via LiteLLM', () => { + it('should handle basic Anthropic configuration with defaultParamsEndpoint', () => { + const apiKey = 'sk-xxxx'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-sonnet-4', + user: 'some_user_id', + }, + reverseProxyUrl: 'http://host.docker.internal:4000/v1', + proxy: '', + headers: {}, + addParams: undefined, + dropParams: undefined, + customParams: { + defaultParamsEndpoint: 'anthropic', + paramDefinitions: [], + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-xxxx', + model: 'claude-sonnet-4', + stream: true, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'some_user_id', + }, + thinking: { + type: 'enabled', + budget_tokens: 2000, + }, + }, + }, + configOptions: { + baseURL: 'http://host.docker.internal:4000/v1', + defaultHeaders: { + 'anthropic-beta': 'prompt-caching-2024-07-31,context-1m-2025-08-07', + }, + }, + tools: [], + }); + }); + + it('should handle Claude 3.7 model with thinking enabled', () => { + const apiKey = 'sk-yyyy'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3.7-sonnet-20241022', + user: 'user123', + temperature: 0.7, + thinking: true, + thinkingBudget: 3000, + }, + reverseProxyUrl: 'http://localhost:4000/v1', + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-yyyy', + model: 'claude-3.7-sonnet-20241022', + stream: true, + temperature: 0.7, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'user123', + }, + thinking: { + type: 'enabled', + budget_tokens: 3000, + }, + }, + }, + configOptions: { + baseURL: 'http://localhost:4000/v1', + defaultHeaders: { + 'anthropic-beta': + 'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + + it('should handle Claude 3.7 model with thinking disabled (topP and topK included)', () => { + const apiKey = 'sk-yyyy'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3.7-sonnet-20241022', + user: 'user123', + temperature: 0.7, + topP: 0.9, + topK: 50, + thinking: false, + }, + reverseProxyUrl: 'http://localhost:4000/v1', + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-yyyy', + model: 'claude-3.7-sonnet-20241022', + stream: true, + temperature: 0.7, + topP: 0.9, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'user123', + }, + topK: 50, + }, + }, + configOptions: { + baseURL: 'http://localhost:4000/v1', + defaultHeaders: { + 'anthropic-beta': + 'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + + it('should handle Claude 3.5 sonnet with special headers', () => { + const apiKey = 'sk-zzzz'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3.5-sonnet-20240620', + user: 'user456', + maxOutputTokens: 4096, + }, + reverseProxyUrl: 'https://api.anthropic.proxy.com/v1', + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-zzzz', + model: 'claude-3.5-sonnet-20240620', + stream: true, + maxTokens: 4096, + modelKwargs: { + metadata: { + user_id: 'user456', + }, + }, + }, + configOptions: { + baseURL: 'https://api.anthropic.proxy.com/v1', + defaultHeaders: { + 'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + + it('should apply anthropic-beta headers based on model pattern', () => { + const apiKey = 'sk-custom'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3-sonnet', + }, + reverseProxyUrl: 'http://custom.proxy/v1', + headers: { + 'Custom-Header': 'custom-value', + Authorization: 'Bearer custom-token', + }, + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-custom', + model: 'claude-3-sonnet', + stream: true, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: undefined, + }, + }, + }, + configOptions: { + baseURL: 'http://custom.proxy/v1', + defaultHeaders: { + 'Custom-Header': 'custom-value', + Authorization: 'Bearer custom-token', + 'anthropic-beta': 'prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + + it('should handle models that do not match Claude patterns', () => { + const apiKey = 'sk-other'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'gpt-4-turbo', + user: 'userGPT', + temperature: 0.8, + }, + reverseProxyUrl: 'http://litellm:4000/v1', + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-other', + model: 'gpt-4-turbo', + stream: true, + temperature: 0.8, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'userGPT', + }, + }, + }, + configOptions: { + baseURL: 'http://litellm:4000/v1', + }, + tools: [], + }); + }); + + it('should handle dropParams correctly in Anthropic path', () => { + const apiKey = 'sk-drop'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3-opus-20240229', + user: 'userDrop', + temperature: 0.5, + maxOutputTokens: 2048, + topP: 0.9, + topK: 40, + }, + reverseProxyUrl: 'http://proxy.litellm/v1', + dropParams: ['temperature', 'topK', 'metadata'], + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-drop', + model: 'claude-3-opus-20240229', + stream: true, + topP: 0.9, + maxTokens: 2048, + // temperature is dropped + // modelKwargs.topK is dropped + // modelKwargs.metadata is dropped completely + }, + configOptions: { + baseURL: 'http://proxy.litellm/v1', + defaultHeaders: { + 'anthropic-beta': 'prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + + it('should handle empty user string', () => { + const apiKey = 'sk-edge'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-2.1', + user: '', + temperature: 0, + }, + reverseProxyUrl: 'http://litellm/v1', + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-edge', + model: 'claude-2.1', + stream: true, + temperature: 0, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: '', + }, + }, + }, + configOptions: { + baseURL: 'http://litellm/v1', + }, + tools: [], + }); + }); + + it('should handle web_search tool', () => { + const apiKey = 'sk-search'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3-opus-20240229', + user: 'searchUser', + web_search: true, + }, + reverseProxyUrl: 'http://litellm/v1', + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-search', + model: 'claude-3-opus-20240229', + stream: true, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'searchUser', + }, + }, + }, + configOptions: { + baseURL: 'http://litellm/v1', + defaultHeaders: { + 'anthropic-beta': 'prompt-caching-2024-07-31', + }, + }, + tools: [ + { + type: 'web_search_20250305', + name: 'web_search', + }, + ], + }); + }); + + it('should properly transform Anthropic config with invocationKwargs', () => { + const apiKey = 'sk-test'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3.5-haiku-20241022', + user: 'testUser', + topP: 0.9, + topK: 40, + }, + reverseProxyUrl: 'http://litellm/v1', + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-test', + model: 'claude-3.5-haiku-20241022', + stream: true, + topP: 0.9, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'testUser', + }, + topK: 40, + }, + }, + configOptions: { + baseURL: 'http://litellm/v1', + defaultHeaders: { + 'anthropic-beta': 'prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + + it('should handle addParams with Anthropic defaults', () => { + const apiKey = 'sk-add'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3-opus-20240229', + user: 'addUser', + temperature: 0.7, + }, + reverseProxyUrl: 'http://litellm/v1', + addParams: { + customParam1: 'value1', + customParam2: 42, + frequencyPenalty: 0.5, // Known OpenAI param + }, + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-add', + model: 'claude-3-opus-20240229', + stream: true, + temperature: 0.7, + frequencyPenalty: 0.5, // Known param added to main config + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'addUser', + }, + customParam1: 'value1', // Unknown params added to modelKwargs + customParam2: 42, + }, + }, + configOptions: { + baseURL: 'http://litellm/v1', + defaultHeaders: { + 'anthropic-beta': 'prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + + it('should handle both addParams and dropParams together', () => { + const apiKey = 'sk-both'; + const endpoint = 'Anthropic (via LiteLLM)'; + const options = { + modelOptions: { + model: 'claude-3.5-sonnet-20240620', + user: 'bothUser', + temperature: 0.6, + topP: 0.9, + topK: 40, + }, + reverseProxyUrl: 'http://litellm/v1', + addParams: { + customParam: 'customValue', + maxRetries: 3, // Known OpenAI param + }, + dropParams: ['temperature', 'topK'], // Drop one known and one unknown param + customParams: { + defaultParamsEndpoint: 'anthropic', + }, + endpoint: 'Anthropic (via LiteLLM)', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + apiKey: 'sk-both', + model: 'claude-3.5-sonnet-20240620', + stream: true, + topP: 0.9, + maxRetries: 3, + maxTokens: 8192, + modelKwargs: { + metadata: { + user_id: 'bothUser', + }, + customParam: 'customValue', + // topK is dropped + }, + }, + configOptions: { + baseURL: 'http://litellm/v1', + defaultHeaders: { + 'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31', + }, + }, + tools: [], + }); + }); + }); +}); diff --git a/packages/api/src/endpoints/openai/config.backward-compat.spec.ts b/packages/api/src/endpoints/openai/config.backward-compat.spec.ts new file mode 100644 index 000000000..0411fe43d --- /dev/null +++ b/packages/api/src/endpoints/openai/config.backward-compat.spec.ts @@ -0,0 +1,431 @@ +import { + Verbosity, + EModelEndpoint, + ReasoningEffort, + ReasoningSummary, +} from 'librechat-data-provider'; +import { getOpenAIConfig } from './config'; + +describe('getOpenAIConfig - Backward Compatibility', () => { + describe('OpenAI endpoint', () => { + it('should handle GPT-5 model with reasoning and web search', () => { + const apiKey = 'sk-proj-somekey'; + const endpoint = undefined; + const options = { + modelOptions: { + model: 'gpt-5-nano', + verbosity: Verbosity.high, + reasoning_effort: ReasoningEffort.high, + reasoning_summary: ReasoningSummary.detailed, + useResponsesApi: true, + web_search: true, + user: 'some-user', + }, + proxy: '', + reverseProxyUrl: null, + endpoint: EModelEndpoint.openAI, + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'gpt-5-nano', + useResponsesApi: true, + user: 'some-user', + apiKey: 'sk-proj-somekey', + reasoning: { + effort: ReasoningEffort.high, + summary: ReasoningSummary.detailed, + }, + modelKwargs: { + text: { + verbosity: Verbosity.high, + }, + }, + }, + configOptions: {}, + tools: [ + { + type: 'web_search_preview', + }, + ], + }); + }); + }); + + describe('OpenRouter endpoint', () => { + it('should handle OpenRouter configuration with dropParams and custom headers', () => { + const apiKey = 'sk-xxxx'; + const endpoint = 'OpenRouter'; + const options = { + modelOptions: { + model: 'qwen/qwen3-max', + user: 'some-user', + }, + reverseProxyUrl: 'https://gateway.ai.cloudflare.com/v1/account-id/gateway-id/openrouter', + headers: { + 'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}', + 'x-test-key': '{{TESTING_USER_VAR}}', + }, + proxy: '', + dropParams: ['user'], + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'qwen/qwen3-max', + include_reasoning: true, + apiKey: 'sk-xxxx', + }, + configOptions: { + baseURL: 'https://gateway.ai.cloudflare.com/v1/account-id/gateway-id/openrouter', + defaultHeaders: { + 'HTTP-Referer': 'https://librechat.ai', + 'X-Title': 'LibreChat', + 'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}', + 'x-test-key': '{{TESTING_USER_VAR}}', + }, + }, + tools: [], + provider: 'openrouter', + }); + }); + }); + + describe('Azure OpenAI endpoint', () => { + it('should handle basic Azure OpenAI configuration', () => { + const apiKey = 'some_key'; + const endpoint = undefined; + const options = { + modelOptions: { + model: 'gpt-4o', + user: 'some_user_id', + }, + reverseProxyUrl: null, + endpoint: 'azureOpenAI', + azure: { + azureOpenAIApiKey: 'some_azure_key', + azureOpenAIApiInstanceName: 'some_instance_name', + azureOpenAIApiDeploymentName: 'gpt-4o', + azureOpenAIApiVersion: '2024-02-15-preview', + }, + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'gpt-4o', + user: 'some_user_id', + azureOpenAIApiKey: 'some_azure_key', + azureOpenAIApiInstanceName: 'some_instance_name', + azureOpenAIApiDeploymentName: 'gpt-4o', + azureOpenAIApiVersion: '2024-02-15-preview', + }, + configOptions: {}, + tools: [], + }); + }); + + it('should handle Azure OpenAI with Responses API and reasoning', () => { + const apiKey = 'some_azure_key'; + const endpoint = undefined; + const options = { + modelOptions: { + model: 'gpt-5', + reasoning_effort: ReasoningEffort.high, + reasoning_summary: ReasoningSummary.detailed, + verbosity: Verbosity.high, + useResponsesApi: true, + user: 'some_user_id', + }, + endpoint: 'azureOpenAI', + azure: { + azureOpenAIApiKey: 'some_azure_key', + azureOpenAIApiInstanceName: 'some_instance_name', + azureOpenAIApiDeploymentName: 'gpt-5', + azureOpenAIApiVersion: '2024-12-01-preview', + }, + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'gpt-5', + useResponsesApi: true, + user: 'some_user_id', + apiKey: 'some_azure_key', + reasoning: { + effort: ReasoningEffort.high, + summary: ReasoningSummary.detailed, + }, + modelKwargs: { + text: { + verbosity: Verbosity.high, + }, + }, + }, + configOptions: { + baseURL: 'https://some_instance_name.openai.azure.com/openai/v1', + defaultHeaders: { + 'api-key': 'some_azure_key', + }, + defaultQuery: { + 'api-version': 'preview', + }, + }, + tools: [], + }); + }); + + it('should handle Azure serverless configuration with dropParams', () => { + const apiKey = 'some_azure_key'; + const endpoint = undefined; + const options = { + modelOptions: { + model: 'jais-30b-chat', + user: 'some_user_id', + }, + reverseProxyUrl: 'https://some_endpoint_name.services.ai.azure.com/models', + endpoint: 'azureOpenAI', + headers: { + 'api-key': 'some_azure_key', + }, + dropParams: ['stream_options', 'user'], + azure: false as const, + defaultQuery: { + 'api-version': '2024-05-01-preview', + }, + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'jais-30b-chat', + apiKey: 'some_azure_key', + }, + configOptions: { + baseURL: 'https://some_endpoint_name.services.ai.azure.com/models', + defaultHeaders: { + 'api-key': 'some_azure_key', + }, + defaultQuery: { + 'api-version': '2024-05-01-preview', + }, + }, + tools: [], + }); + }); + + it('should handle Azure serverless with user-provided key configuration', () => { + const apiKey = 'some_azure_key'; + const endpoint = undefined; + const options = { + modelOptions: { + model: 'grok-3', + user: 'some_user_id', + }, + reverseProxyUrl: 'https://some_endpoint_name.services.ai.azure.com/models', + endpoint: 'azureOpenAI', + headers: { + 'api-key': 'some_azure_key', + }, + dropParams: ['stream_options', 'user'], + azure: false as const, + defaultQuery: { + 'api-version': '2024-05-01-preview', + }, + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'grok-3', + apiKey: 'some_azure_key', + }, + configOptions: { + baseURL: 'https://some_endpoint_name.services.ai.azure.com/models', + defaultHeaders: { + 'api-key': 'some_azure_key', + }, + defaultQuery: { + 'api-version': '2024-05-01-preview', + }, + }, + tools: [], + }); + }); + + it('should handle Azure serverless with Mistral model configuration', () => { + const apiKey = 'some_azure_key'; + const endpoint = undefined; + const options = { + modelOptions: { + model: 'Mistral-Large-2411', + user: 'some_user_id', + }, + reverseProxyUrl: 'https://some_endpoint_name.services.ai.azure.com/models', + endpoint: 'azureOpenAI', + headers: { + 'api-key': 'some_azure_key', + }, + dropParams: ['stream_options', 'user'], + azure: false as const, + defaultQuery: { + 'api-version': '2024-05-01-preview', + }, + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'Mistral-Large-2411', + apiKey: 'some_azure_key', + }, + configOptions: { + baseURL: 'https://some_endpoint_name.services.ai.azure.com/models', + defaultHeaders: { + 'api-key': 'some_azure_key', + }, + defaultQuery: { + 'api-version': '2024-05-01-preview', + }, + }, + tools: [], + }); + }); + + it('should handle Azure serverless with DeepSeek model without dropParams', () => { + const apiKey = 'some_azure_key'; + const endpoint = undefined; + const options = { + modelOptions: { + model: 'DeepSeek-R1', + user: 'some_user_id', + }, + reverseProxyUrl: 'https://some_endpoint_name.models.ai.azure.com/v1/', + endpoint: 'azureOpenAI', + headers: { + 'api-key': 'some_azure_key', + }, + azure: false as const, + defaultQuery: { + 'api-version': '2024-08-01-preview', + }, + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'DeepSeek-R1', + user: 'some_user_id', + apiKey: 'some_azure_key', + }, + configOptions: { + baseURL: 'https://some_endpoint_name.models.ai.azure.com/v1/', + defaultHeaders: { + 'api-key': 'some_azure_key', + }, + defaultQuery: { + 'api-version': '2024-08-01-preview', + }, + }, + tools: [], + }); + }); + }); + + describe('Custom endpoints', () => { + it('should handle Groq custom endpoint configuration', () => { + const apiKey = 'gsk_somekey'; + const endpoint = 'groq'; + const options = { + modelOptions: { + model: 'qwen/qwen3-32b', + user: 'some-user', + }, + reverseProxyUrl: 'https://api.groq.com/openai/v1/', + proxy: '', + headers: {}, + endpoint: 'groq', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: 'qwen/qwen3-32b', + user: 'some-user', + apiKey: 'gsk_somekey', + }, + configOptions: { + baseURL: 'https://api.groq.com/openai/v1/', + defaultHeaders: {}, + }, + tools: [], + }); + }); + + it('should handle Cloudflare Workers AI with custom headers and addParams', () => { + const apiKey = 'someKey'; + const endpoint = 'Cloudflare Workers AI'; + const options = { + modelOptions: { + model: '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b', + user: 'some-user', + }, + reverseProxyUrl: + 'https://gateway.ai.cloudflare.com/v1/${CF_ACCOUNT_ID}/${CF_GATEWAY_ID}/workers-ai/v1', + proxy: '', + headers: { + 'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}', + 'x-test-key': '{{TESTING_USER_VAR}}', + }, + addParams: { + disableStreaming: true, + }, + endpoint: 'Cloudflare Workers AI', + endpointType: 'custom', + }; + + const result = getOpenAIConfig(apiKey, options, endpoint); + + expect(result).toEqual({ + llmConfig: { + streaming: true, + model: '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b', + user: 'some-user', + disableStreaming: true, + apiKey: 'someKey', + }, + configOptions: { + baseURL: + 'https://gateway.ai.cloudflare.com/v1/${CF_ACCOUNT_ID}/${CF_GATEWAY_ID}/workers-ai/v1', + defaultHeaders: { + 'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}', + 'x-test-key': '{{TESTING_USER_VAR}}', + }, + }, + tools: [], + }); + }); + }); +}); diff --git a/packages/api/src/endpoints/openai/llm.spec.ts b/packages/api/src/endpoints/openai/config.spec.ts similarity index 99% rename from packages/api/src/endpoints/openai/llm.spec.ts rename to packages/api/src/endpoints/openai/config.spec.ts index 4fd88e2e9..ac4e825a0 100644 --- a/packages/api/src/endpoints/openai/llm.spec.ts +++ b/packages/api/src/endpoints/openai/config.spec.ts @@ -1,7 +1,8 @@ import { Verbosity, ReasoningEffort, ReasoningSummary } from 'librechat-data-provider'; import type { RequestInit } from 'undici'; import type { OpenAIParameters, AzureOptions } from '~/types'; -import { getOpenAIConfig, knownOpenAIParams } from './llm'; +import { getOpenAIConfig } from './config'; +import { knownOpenAIParams } from './llm'; describe('getOpenAIConfig', () => { const mockApiKey = 'test-api-key'; diff --git a/packages/api/src/endpoints/openai/config.ts b/packages/api/src/endpoints/openai/config.ts new file mode 100644 index 000000000..d7412e0e1 --- /dev/null +++ b/packages/api/src/endpoints/openai/config.ts @@ -0,0 +1,150 @@ +import { ProxyAgent } from 'undici'; +import { Providers } from '@librechat/agents'; +import { KnownEndpoints, EModelEndpoint } from 'librechat-data-provider'; +import type * as t from '~/types'; +import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm'; +import { transformToOpenAIConfig } from './transform'; +import { constructAzureURL } from '~/utils/azure'; +import { createFetch } from '~/utils/generators'; +import { getOpenAILLMConfig } from './llm'; + +type Fetch = (input: string | URL | Request, init?: RequestInit) => Promise; + +/** + * Generates configuration options for creating a language model (LLM) instance. + * @param apiKey - The API key for authentication. + * @param options - Additional options for configuring the LLM. + * @param endpoint - The endpoint name + * @returns Configuration options for creating an LLM instance. + */ +export function getOpenAIConfig( + apiKey: string, + options: t.OpenAIConfigOptions = {}, + endpoint?: string | null, +): t.OpenAIConfigResult { + const { + proxy, + addParams, + dropParams, + defaultQuery, + directEndpoint, + streaming = true, + modelOptions = {}, + reverseProxyUrl: baseURL, + } = options; + + let llmConfig: t.OAIClientOptions; + let tools: t.LLMConfigResult['tools']; + const isAnthropic = options.customParams?.defaultParamsEndpoint === EModelEndpoint.anthropic; + + const useOpenRouter = + !isAnthropic && + ((baseURL && baseURL.includes(KnownEndpoints.openrouter)) || + (endpoint != null && endpoint.toLowerCase().includes(KnownEndpoints.openrouter))); + + let azure = options.azure; + let headers = options.headers; + if (isAnthropic) { + const anthropicResult = getAnthropicLLMConfig(apiKey, { + modelOptions, + proxy: options.proxy, + }); + const transformed = transformToOpenAIConfig({ + addParams, + dropParams, + llmConfig: anthropicResult.llmConfig, + fromEndpoint: EModelEndpoint.anthropic, + }); + llmConfig = transformed.llmConfig; + tools = anthropicResult.tools; + if (transformed.configOptions?.defaultHeaders) { + headers = Object.assign(headers ?? {}, transformed.configOptions?.defaultHeaders); + } + } else { + const openaiResult = getOpenAILLMConfig({ + azure, + apiKey, + baseURL, + streaming, + addParams, + dropParams, + modelOptions, + useOpenRouter, + }); + llmConfig = openaiResult.llmConfig; + azure = openaiResult.azure; + tools = openaiResult.tools; + } + + const configOptions: t.OpenAIConfiguration = {}; + if (baseURL) { + configOptions.baseURL = baseURL; + } + if (useOpenRouter) { + configOptions.defaultHeaders = Object.assign( + { + 'HTTP-Referer': 'https://librechat.ai', + 'X-Title': 'LibreChat', + }, + headers, + ); + } else if (headers) { + configOptions.defaultHeaders = headers; + } + + if (defaultQuery) { + configOptions.defaultQuery = defaultQuery; + } + + if (proxy) { + const proxyAgent = new ProxyAgent(proxy); + configOptions.fetchOptions = { + dispatcher: proxyAgent, + }; + } + + if (azure && !isAnthropic) { + const constructAzureResponsesApi = () => { + if (!llmConfig.useResponsesApi || !azure) { + return; + } + + configOptions.baseURL = constructAzureURL({ + baseURL: configOptions.baseURL || 'https://${INSTANCE_NAME}.openai.azure.com/openai/v1', + azureOptions: azure, + }); + + configOptions.defaultHeaders = { + ...configOptions.defaultHeaders, + 'api-key': apiKey, + }; + configOptions.defaultQuery = { + ...configOptions.defaultQuery, + 'api-version': configOptions.defaultQuery?.['api-version'] ?? 'preview', + }; + }; + + constructAzureResponsesApi(); + } + + if (process.env.OPENAI_ORGANIZATION && !isAnthropic) { + configOptions.organization = process.env.OPENAI_ORGANIZATION; + } + + if (directEndpoint === true && configOptions?.baseURL != null) { + configOptions.fetch = createFetch({ + directEndpoint: directEndpoint, + reverseProxyUrl: configOptions?.baseURL, + }) as unknown as Fetch; + } + + const result: t.OpenAIConfigResult = { + llmConfig, + configOptions, + tools, + }; + if (useOpenRouter) { + result.provider = Providers.OPENROUTER; + } + return result; +} diff --git a/packages/api/src/endpoints/openai/index.ts b/packages/api/src/endpoints/openai/index.ts index 46ec10686..0cbc3a276 100644 --- a/packages/api/src/endpoints/openai/index.ts +++ b/packages/api/src/endpoints/openai/index.ts @@ -1,2 +1,3 @@ export * from './llm'; +export * from './config'; export * from './initialize'; diff --git a/packages/api/src/endpoints/openai/initialize.ts b/packages/api/src/endpoints/openai/initialize.ts index 425aa3d55..b313c28bf 100644 --- a/packages/api/src/endpoints/openai/initialize.ts +++ b/packages/api/src/endpoints/openai/initialize.ts @@ -9,7 +9,7 @@ import { createHandleLLMNewToken } from '~/utils/generators'; import { getAzureCredentials } from '~/utils/azure'; import { isUserProvided } from '~/utils/common'; import { resolveHeaders } from '~/utils/env'; -import { getOpenAIConfig } from './llm'; +import { getOpenAIConfig } from './config'; /** * Initializes OpenAI options for agent usage. This function always returns configuration @@ -115,7 +115,7 @@ export const initializeOpenAI = async ({ } else if (isAzureOpenAI) { clientOptions.azure = userProvidesKey && userValues?.apiKey ? JSON.parse(userValues.apiKey) : getAzureCredentials(); - apiKey = clientOptions.azure?.azureOpenAIApiKey; + apiKey = clientOptions.azure ? clientOptions.azure.azureOpenAIApiKey : undefined; } if (userProvidesKey && !apiKey) { diff --git a/packages/api/src/endpoints/openai/llm.ts b/packages/api/src/endpoints/openai/llm.ts index cce8d622a..d508dcd1e 100644 --- a/packages/api/src/endpoints/openai/llm.ts +++ b/packages/api/src/endpoints/openai/llm.ts @@ -1,16 +1,11 @@ -import { ProxyAgent } from 'undici'; -import { Providers } from '@librechat/agents'; -import { KnownEndpoints, removeNullishValues } from 'librechat-data-provider'; +import { removeNullishValues } from 'librechat-data-provider'; import type { BindToolsInput } from '@langchain/core/language_models/chat_models'; import type { AzureOpenAIInput } from '@langchain/openai'; import type { OpenAI } from 'openai'; import type * as t from '~/types'; import { sanitizeModelName, constructAzureURL } from '~/utils/azure'; -import { createFetch } from '~/utils/generators'; import { isEnabled } from '~/utils/common'; -type Fetch = (input: string | URL | Request, init?: RequestInit) => Promise; - export const knownOpenAIParams = new Set([ // Constructor/Instance Parameters 'model', @@ -80,47 +75,44 @@ function hasReasoningParams({ ); } -/** - * Generates configuration options for creating a language model (LLM) instance. - * @param apiKey - The API key for authentication. - * @param options - Additional options for configuring the LLM. - * @param endpoint - The endpoint name - * @returns Configuration options for creating an LLM instance. - */ -export function getOpenAIConfig( - apiKey: string, - options: t.OpenAIConfigOptions = {}, - endpoint?: string | null, -): t.LLMConfigResult { - const { - modelOptions: _modelOptions = {}, - reverseProxyUrl, - directEndpoint, - defaultQuery, - headers, - proxy, - azure, - streaming = true, - addParams, - dropParams, - } = options; +export function getOpenAILLMConfig({ + azure, + apiKey, + baseURL, + streaming, + addParams, + dropParams, + useOpenRouter, + modelOptions: _modelOptions, +}: { + apiKey: string; + streaming: boolean; + baseURL?: string | null; + modelOptions: Partial; + addParams?: Record; + dropParams?: string[]; + useOpenRouter?: boolean; + azure?: false | t.AzureOptions; +}): Pick & { + azure?: t.AzureOptions; +} { const { reasoning_effort, reasoning_summary, verbosity, + web_search, frequency_penalty, presence_penalty, ...modelOptions } = _modelOptions; - const llmConfig: Partial & - Partial & - Partial = Object.assign( + + const llmConfig = Object.assign( { streaming, model: modelOptions.model ?? '', }, modelOptions, - ); + ) as Partial & Partial & Partial; if (frequency_penalty != null) { llmConfig.frequencyPenalty = frequency_penalty; @@ -148,104 +140,8 @@ export function getOpenAIConfig( } } - let useOpenRouter = false; - const configOptions: t.OpenAIConfiguration = {}; - - if ( - (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) || - (endpoint && endpoint.toLowerCase().includes(KnownEndpoints.openrouter)) - ) { - useOpenRouter = true; + if (useOpenRouter) { llmConfig.include_reasoning = true; - configOptions.baseURL = reverseProxyUrl; - configOptions.defaultHeaders = Object.assign( - { - 'HTTP-Referer': 'https://librechat.ai', - 'X-Title': 'LibreChat', - }, - headers, - ); - } else if (reverseProxyUrl) { - configOptions.baseURL = reverseProxyUrl; - if (headers) { - configOptions.defaultHeaders = headers; - } - } - - if (defaultQuery) { - configOptions.defaultQuery = defaultQuery; - } - - if (proxy) { - const proxyAgent = new ProxyAgent(proxy); - configOptions.fetchOptions = { - dispatcher: proxyAgent, - }; - } - - if (azure) { - const useModelName = isEnabled(process.env.AZURE_USE_MODEL_AS_DEPLOYMENT_NAME); - const updatedAzure = { ...azure }; - updatedAzure.azureOpenAIApiDeploymentName = useModelName - ? sanitizeModelName(llmConfig.model || '') - : azure.azureOpenAIApiDeploymentName; - - if (process.env.AZURE_OPENAI_DEFAULT_MODEL) { - llmConfig.model = process.env.AZURE_OPENAI_DEFAULT_MODEL; - } - - const constructBaseURL = () => { - if (!configOptions.baseURL) { - return; - } - const azureURL = constructAzureURL({ - baseURL: configOptions.baseURL, - azureOptions: updatedAzure, - }); - updatedAzure.azureOpenAIBasePath = azureURL.split( - `/${updatedAzure.azureOpenAIApiDeploymentName}`, - )[0]; - }; - - constructBaseURL(); - Object.assign(llmConfig, updatedAzure); - - const constructAzureResponsesApi = () => { - if (!llmConfig.useResponsesApi) { - return; - } - - configOptions.baseURL = constructAzureURL({ - baseURL: configOptions.baseURL || 'https://${INSTANCE_NAME}.openai.azure.com/openai/v1', - azureOptions: llmConfig, - }); - - delete llmConfig.azureOpenAIApiDeploymentName; - delete llmConfig.azureOpenAIApiInstanceName; - delete llmConfig.azureOpenAIApiVersion; - delete llmConfig.azureOpenAIBasePath; - delete llmConfig.azureOpenAIApiKey; - llmConfig.apiKey = apiKey; - - configOptions.defaultHeaders = { - ...configOptions.defaultHeaders, - 'api-key': apiKey, - }; - configOptions.defaultQuery = { - ...configOptions.defaultQuery, - 'api-version': configOptions.defaultQuery?.['api-version'] ?? 'preview', - }; - }; - - constructAzureResponsesApi(); - - llmConfig.model = updatedAzure.azureOpenAIApiDeploymentName; - } else { - llmConfig.apiKey = apiKey; - } - - if (process.env.OPENAI_ORGANIZATION && azure) { - configOptions.organization = process.env.OPENAI_ORGANIZATION; } if ( @@ -270,7 +166,7 @@ export function getOpenAIConfig( const tools: BindToolsInput[] = []; - if (modelOptions.web_search) { + if (web_search) { llmConfig.useResponsesApi = true; tools.push({ type: 'web_search_preview' }); } @@ -278,7 +174,7 @@ export function getOpenAIConfig( /** * Note: OpenAI Web Search models do not support any known parameters besides `max_tokens` */ - if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model)) { + if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model as string)) { const searchExcludeParams = [ 'frequency_penalty', 'presence_penalty', @@ -301,13 +197,13 @@ export function getOpenAIConfig( combinedDropParams.forEach((param) => { if (param in llmConfig) { - delete llmConfig[param as keyof t.ClientOptions]; + delete llmConfig[param as keyof t.OAIClientOptions]; } }); } else if (dropParams && Array.isArray(dropParams)) { dropParams.forEach((param) => { if (param in llmConfig) { - delete llmConfig[param as keyof t.ClientOptions]; + delete llmConfig[param as keyof t.OAIClientOptions]; } }); } @@ -329,20 +225,52 @@ export function getOpenAIConfig( llmConfig.modelKwargs = modelKwargs; } - if (directEndpoint === true && configOptions?.baseURL != null) { - configOptions.fetch = createFetch({ - directEndpoint: directEndpoint, - reverseProxyUrl: configOptions?.baseURL, - }) as unknown as Fetch; + if (!azure) { + llmConfig.apiKey = apiKey; + return { llmConfig, tools }; } - const result: t.LLMConfigResult = { - llmConfig, - configOptions, - tools, - }; - if (useOpenRouter) { - result.provider = Providers.OPENROUTER; + const useModelName = isEnabled(process.env.AZURE_USE_MODEL_AS_DEPLOYMENT_NAME); + const updatedAzure = { ...azure }; + updatedAzure.azureOpenAIApiDeploymentName = useModelName + ? sanitizeModelName(llmConfig.model || '') + : azure.azureOpenAIApiDeploymentName; + + if (process.env.AZURE_OPENAI_DEFAULT_MODEL) { + llmConfig.model = process.env.AZURE_OPENAI_DEFAULT_MODEL; } - return result; + + const constructAzureOpenAIBasePath = () => { + if (!baseURL) { + return; + } + const azureURL = constructAzureURL({ + baseURL, + azureOptions: updatedAzure, + }); + updatedAzure.azureOpenAIBasePath = azureURL.split( + `/${updatedAzure.azureOpenAIApiDeploymentName}`, + )[0]; + }; + + constructAzureOpenAIBasePath(); + Object.assign(llmConfig, updatedAzure); + + const constructAzureResponsesApi = () => { + if (!llmConfig.useResponsesApi) { + return; + } + + delete llmConfig.azureOpenAIApiDeploymentName; + delete llmConfig.azureOpenAIApiInstanceName; + delete llmConfig.azureOpenAIApiVersion; + delete llmConfig.azureOpenAIBasePath; + delete llmConfig.azureOpenAIApiKey; + llmConfig.apiKey = apiKey; + }; + + constructAzureResponsesApi(); + + llmConfig.model = updatedAzure.azureOpenAIApiDeploymentName; + return { llmConfig, tools, azure: updatedAzure }; } diff --git a/packages/api/src/endpoints/openai/transform.ts b/packages/api/src/endpoints/openai/transform.ts new file mode 100644 index 000000000..92917440c --- /dev/null +++ b/packages/api/src/endpoints/openai/transform.ts @@ -0,0 +1,95 @@ +import { EModelEndpoint } from 'librechat-data-provider'; +import type { ClientOptions } from '@librechat/agents'; +import type * as t from '~/types'; +import { knownOpenAIParams } from './llm'; + +const anthropicExcludeParams = new Set(['anthropicApiUrl']); + +/** + * Transforms a Non-OpenAI LLM config to an OpenAI-conformant config. + * Non-OpenAI parameters are moved to modelKwargs. + * Also extracts configuration options that belong in configOptions. + * Handles addParams and dropParams for parameter customization. + */ +export function transformToOpenAIConfig({ + addParams, + dropParams, + llmConfig, + fromEndpoint, +}: { + addParams?: Record; + dropParams?: string[]; + llmConfig: ClientOptions; + fromEndpoint: string; +}): { + llmConfig: t.OAIClientOptions; + configOptions: Partial; +} { + const openAIConfig: Partial = {}; + let configOptions: Partial = {}; + let modelKwargs: Record = {}; + let hasModelKwargs = false; + + const isAnthropic = fromEndpoint === EModelEndpoint.anthropic; + const excludeParams = isAnthropic ? anthropicExcludeParams : new Set(); + + for (const [key, value] of Object.entries(llmConfig)) { + if (value === undefined || value === null) { + continue; + } + + if (excludeParams.has(key)) { + continue; + } + + if (isAnthropic && key === 'clientOptions') { + configOptions = Object.assign({}, configOptions, value as Partial); + continue; + } else if (isAnthropic && key === 'invocationKwargs') { + modelKwargs = Object.assign({}, modelKwargs, value as Record); + hasModelKwargs = true; + continue; + } + + if (knownOpenAIParams.has(key)) { + (openAIConfig as Record)[key] = value; + } else { + modelKwargs[key] = value; + hasModelKwargs = true; + } + } + + if (addParams && typeof addParams === 'object') { + for (const [key, value] of Object.entries(addParams)) { + if (knownOpenAIParams.has(key)) { + (openAIConfig as Record)[key] = value; + } else { + modelKwargs[key] = value; + hasModelKwargs = true; + } + } + } + + if (hasModelKwargs) { + openAIConfig.modelKwargs = modelKwargs; + } + + if (dropParams && Array.isArray(dropParams)) { + dropParams.forEach((param) => { + if (param in openAIConfig) { + delete openAIConfig[param as keyof t.OAIClientOptions]; + } + if (openAIConfig.modelKwargs && param in openAIConfig.modelKwargs) { + delete openAIConfig.modelKwargs[param]; + if (Object.keys(openAIConfig.modelKwargs).length === 0) { + delete openAIConfig.modelKwargs; + } + } + }); + } + + return { + llmConfig: openAIConfig as t.OAIClientOptions, + configOptions, + }; +} diff --git a/packages/api/src/types/anthropic.ts b/packages/api/src/types/anthropic.ts new file mode 100644 index 000000000..c4c507ee4 --- /dev/null +++ b/packages/api/src/types/anthropic.ts @@ -0,0 +1,69 @@ +import { z } from 'zod'; +import { Dispatcher } from 'undici'; +import { anthropicSchema } from 'librechat-data-provider'; +import type { AnthropicClientOptions } from '@librechat/agents'; +import type { LLMConfigResult } from './openai'; + +export type AnthropicParameters = z.infer; + +export interface ThinkingConfigDisabled { + type: 'disabled'; +} + +export interface ThinkingConfigEnabled { + /** + * Determines how many tokens Claude can use for its internal reasoning process. + * Larger budgets can enable more thorough analysis for complex problems, improving + * response quality. + * + * Must be ≥1024 and less than `max_tokens`. + * + * See + * [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) + * for details. + */ + budget_tokens: number; + + type: 'enabled'; +} + +/** + * Configuration for enabling Claude's extended thinking. + * + * When enabled, responses include `thinking` content blocks showing Claude's + * thinking process before the final answer. Requires a minimum budget of 1,024 + * tokens and counts towards your `max_tokens` limit. + * + * See + * [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) + * for details. + */ +export type ThinkingConfigParam = ThinkingConfigEnabled | ThinkingConfigDisabled; + +export type AnthropicModelOptions = Partial> & { + thinking?: AnthropicParameters['thinking'] | null; + user?: string; +}; + +/** + * Configuration options for the getLLMConfig function + */ +export interface AnthropicConfigOptions { + modelOptions?: AnthropicModelOptions; + /** Proxy server URL */ + proxy?: string | null; + /** URL for a reverse proxy, if used */ + reverseProxyUrl?: string | null; +} + +/** + * Return type for getLLMConfig function + */ +export type AnthropicLLMConfigResult = LLMConfigResult< + AnthropicClientOptions & { + clientOptions?: { + fetchOptions?: { dispatcher: Dispatcher }; + }; + stream?: boolean; + } +>; diff --git a/packages/api/src/types/index.ts b/packages/api/src/types/index.ts index f499ec4a9..5603c09a5 100644 --- a/packages/api/src/types/index.ts +++ b/packages/api/src/types/index.ts @@ -13,3 +13,4 @@ export * from './prompts'; export * from './run'; export * from './tools'; export * from './zod'; +export * from './anthropic'; diff --git a/packages/api/src/types/openai.ts b/packages/api/src/types/openai.ts index 387944c0d..338931493 100644 --- a/packages/api/src/types/openai.ts +++ b/packages/api/src/types/openai.ts @@ -1,6 +1,6 @@ import { z } from 'zod'; import { openAISchema, EModelEndpoint } from 'librechat-data-provider'; -import type { TEndpointOption, TAzureConfig, TEndpoint } from 'librechat-data-provider'; +import type { TEndpointOption, TAzureConfig, TEndpoint, TConfig } from 'librechat-data-provider'; import type { BindToolsInput } from '@langchain/core/language_models/chat_models'; import type { OpenAIClientOptions, Providers } from '@librechat/agents'; import type { AzureOptions } from './azure'; @@ -8,11 +8,13 @@ import type { AppConfig } from './config'; export type OpenAIParameters = z.infer; +export type OpenAIModelOptions = Partial; + /** * Configuration options for the getLLMConfig function */ export interface OpenAIConfigOptions { - modelOptions?: Partial; + modelOptions?: OpenAIModelOptions; directEndpoint?: boolean; reverseProxyUrl?: string | null; defaultQuery?: Record; @@ -22,24 +24,28 @@ export interface OpenAIConfigOptions { streaming?: boolean; addParams?: Record; dropParams?: string[]; + customParams?: Partial; } export type OpenAIConfiguration = OpenAIClientOptions['configuration']; -export type ClientOptions = OpenAIClientOptions & { +export type OAIClientOptions = OpenAIClientOptions & { include_reasoning?: boolean; }; /** * Return type for getLLMConfig function */ -export interface LLMConfigResult { - llmConfig: ClientOptions; - configOptions: OpenAIConfiguration; - tools?: BindToolsInput[]; +export interface LLMConfigResult { + llmConfig: T; provider?: Providers; + tools?: BindToolsInput[]; } +export type OpenAIConfigResult = LLMConfigResult & { + configOptions?: OpenAIConfiguration; +}; + /** * Interface for user values retrieved from the database */ diff --git a/packages/api/src/utils/index.ts b/packages/api/src/utils/index.ts index f20550eae..4a5337fe3 100644 --- a/packages/api/src/utils/index.ts +++ b/packages/api/src/utils/index.ts @@ -15,3 +15,4 @@ export * from './text'; export { default as Tokenizer } from './tokenizer'; export * from './yaml'; export * from './http'; +export * from './tokens'; diff --git a/api/utils/tokens.js b/packages/api/src/utils/tokens.ts similarity index 73% rename from api/utils/tokens.js rename to packages/api/src/utils/tokens.ts index c94c0ccdf..48958c21e 100644 --- a/api/utils/tokens.js +++ b/packages/api/src/utils/tokens.ts @@ -1,5 +1,23 @@ -const z = require('zod'); -const { EModelEndpoint } = require('librechat-data-provider'); +import z from 'zod'; +import { EModelEndpoint } from 'librechat-data-provider'; + +/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit + * + * Note: the [key: string]: unknown is not in the original JSDoc typedef in /api/typedefs.js, but I've included it since + * getModelMaxOutputTokens calls getModelTokenValue with a key of 'output', which was not in the original JSDoc typedef, + * but would be referenced in a TokenConfig in the if(matchedPattern) portion of getModelTokenValue. + * So in order to preserve functionality for that case and any others which might reference an additional key I'm unaware of, + * I've included it here until the interface can be typed more tightly. + */ +export interface TokenConfig { + prompt: number; + completion: number; + context: number; + [key: string]: unknown; +} + +/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */ +export type EndpointTokenConfig = Record; const openAIModels = { 'o4-mini': 200000, @@ -242,7 +260,7 @@ const aggregateModels = { 'gpt-oss-120b': 131000, }; -const maxTokensMap = { +export const maxTokensMap = { [EModelEndpoint.azureOpenAI]: openAIModels, [EModelEndpoint.openAI]: aggregateModels, [EModelEndpoint.agents]: aggregateModels, @@ -252,7 +270,7 @@ const maxTokensMap = { [EModelEndpoint.bedrock]: bedrockModels, }; -const modelMaxOutputs = { +export const modelMaxOutputs = { o1: 32268, // -500 from max: 32,768 'o1-mini': 65136, // -500 from max: 65,536 'o1-preview': 32268, // -500 from max: 32,768 @@ -261,7 +279,7 @@ const modelMaxOutputs = { 'gpt-5-nano': 128000, 'gpt-oss-20b': 131000, 'gpt-oss-120b': 131000, - system_default: 1024, + system_default: 32000, }; /** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */ @@ -277,7 +295,7 @@ const anthropicMaxOutputs = { 'claude-3-7-sonnet': 128000, }; -const maxOutputTokensMap = { +export const maxOutputTokensMap = { [EModelEndpoint.anthropic]: anthropicMaxOutputs, [EModelEndpoint.azureOpenAI]: modelMaxOutputs, [EModelEndpoint.openAI]: modelMaxOutputs, @@ -287,10 +305,13 @@ const maxOutputTokensMap = { /** * Finds the first matching pattern in the tokens map. * @param {string} modelName - * @param {Record} tokensMap + * @param {Record | EndpointTokenConfig} tokensMap * @returns {string|null} */ -function findMatchingPattern(modelName, tokensMap) { +export function findMatchingPattern( + modelName: string, + tokensMap: Record | EndpointTokenConfig, +): string | null { const keys = Object.keys(tokensMap); for (let i = keys.length - 1; i >= 0; i--) { const modelKey = keys[i]; @@ -305,57 +326,79 @@ function findMatchingPattern(modelName, tokensMap) { /** * Retrieves a token value for a given model name from a tokens map. * - * @param {string} modelName - The name of the model to look up. - * @param {EndpointTokenConfig | Record} tokensMap - The map of model names to token values. - * @param {string} [key='context'] - The key to look up in the tokens map. - * @returns {number|undefined} The token value for the given model or undefined if no match is found. + * @param modelName - The name of the model to look up. + * @param tokensMap - The map of model names to token values. + * @param [key='context'] - The key to look up in the tokens map. + * @returns The token value for the given model or undefined if no match is found. */ -function getModelTokenValue(modelName, tokensMap, key = 'context') { +export function getModelTokenValue( + modelName: string, + tokensMap?: EndpointTokenConfig | Record, + key = 'context' as keyof TokenConfig, +): number | undefined { if (typeof modelName !== 'string' || !tokensMap) { return undefined; } - if (tokensMap[modelName]?.context) { - return tokensMap[modelName].context; + const value = tokensMap[modelName]; + if (typeof value === 'number') { + return value; } - if (tokensMap[modelName]) { - return tokensMap[modelName]; + if (value?.context) { + return value.context; } const matchedPattern = findMatchingPattern(modelName, tokensMap); if (matchedPattern) { const result = tokensMap[matchedPattern]; - return result?.[key] ?? result ?? tokensMap.system_default; + if (typeof result === 'number') { + return result; + } + + const tokenValue = result?.[key]; + if (typeof tokenValue === 'number') { + return tokenValue; + } + return tokensMap.system_default as number | undefined; } - return tokensMap.system_default; + return tokensMap.system_default as number | undefined; } /** * Retrieves the maximum tokens for a given model name. * - * @param {string} modelName - The name of the model to look up. - * @param {string} endpoint - The endpoint (default is 'openAI'). - * @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup - * @returns {number|undefined} The maximum tokens for the given model or undefined if no match is found. + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup + * @returns The maximum tokens for the given model or undefined if no match is found. */ -function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) { - const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint]; +export function getModelMaxTokens( + modelName: string, + endpoint = EModelEndpoint.openAI, + endpointTokenConfig?: EndpointTokenConfig, +): number | undefined { + const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap]; return getModelTokenValue(modelName, tokensMap); } /** * Retrieves the maximum output tokens for a given model name. * - * @param {string} modelName - The name of the model to look up. - * @param {string} endpoint - The endpoint (default is 'openAI'). - * @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup - * @returns {number|undefined} The maximum output tokens for the given model or undefined if no match is found. + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup + * @returns The maximum output tokens for the given model or undefined if no match is found. */ -function getModelMaxOutputTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) { - const tokensMap = endpointTokenConfig ?? maxOutputTokensMap[endpoint]; +export function getModelMaxOutputTokens( + modelName: string, + endpoint = EModelEndpoint.openAI, + endpointTokenConfig?: EndpointTokenConfig, +): number | undefined { + const tokensMap = + endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap]; return getModelTokenValue(modelName, tokensMap, 'output'); } @@ -363,21 +406,24 @@ function getModelMaxOutputTokens(modelName, endpoint = EModelEndpoint.openAI, en * Retrieves the model name key for a given model name input. If the exact model name isn't found, * it searches for partial matches within the model name, checking keys in reverse order. * - * @param {string} modelName - The name of the model to look up. - * @param {string} endpoint - The endpoint (default is 'openAI'). - * @returns {string|undefined} The model name key for the given model; returns input if no match is found and is string. + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The model name key for the given model; returns input if no match is found and is string. * * @example * matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613' * matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k' * matchModelName('unknown-model'); // Returns undefined */ -function matchModelName(modelName, endpoint = EModelEndpoint.openAI) { +export function matchModelName( + modelName: string, + endpoint = EModelEndpoint.openAI, +): string | undefined { if (typeof modelName !== 'string') { return undefined; } - const tokensMap = maxTokensMap[endpoint]; + const tokensMap: Record = maxTokensMap[endpoint as keyof typeof maxTokensMap]; if (!tokensMap) { return modelName; } @@ -390,7 +436,7 @@ function matchModelName(modelName, endpoint = EModelEndpoint.openAI) { return matchedPattern || modelName; } -const modelSchema = z.object({ +export const modelSchema = z.object({ id: z.string(), pricing: z.object({ prompt: z.string(), @@ -399,7 +445,7 @@ const modelSchema = z.object({ context_length: z.number(), }); -const inputSchema = z.object({ +export const inputSchema = z.object({ data: z.array(modelSchema), }); @@ -408,7 +454,7 @@ const inputSchema = z.object({ * @param {{ data: Array> }} input The input object containing base URL and data fetched from the API. * @returns {EndpointTokenConfig} The processed model data. */ -function processModelData(input) { +export function processModelData(input: z.infer): EndpointTokenConfig { const validationResult = inputSchema.safeParse(input); if (!validationResult.success) { throw new Error('Invalid input data'); @@ -416,7 +462,7 @@ function processModelData(input) { const { data } = validationResult.data; /** @type {EndpointTokenConfig} */ - const tokenConfig = {}; + const tokenConfig: EndpointTokenConfig = {}; for (const model of data) { const modelKey = model.id; @@ -439,7 +485,7 @@ function processModelData(input) { return tokenConfig; } -const tiktokenModels = new Set([ +export const tiktokenModels = new Set([ 'text-davinci-003', 'text-davinci-002', 'text-davinci-001', @@ -477,17 +523,3 @@ const tiktokenModels = new Set([ 'gpt-3.5-turbo', 'gpt-3.5-turbo-0301', ]); - -module.exports = { - inputSchema, - modelSchema, - maxTokensMap, - tiktokenModels, - maxOutputTokensMap, - matchModelName, - processModelData, - getModelMaxTokens, - getModelTokenValue, - findMatchingPattern, - getModelMaxOutputTokens, -}; diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 0e9548b1d..dc4baebc0 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -619,14 +619,14 @@ export const tConversationSchema = z.object({ userLabel: z.string().optional(), model: z.string().nullable().optional(), promptPrefix: z.string().nullable().optional(), - temperature: z.number().optional(), + temperature: z.number().nullable().optional(), topP: z.number().optional(), topK: z.number().optional(), top_p: z.number().optional(), frequency_penalty: z.number().optional(), presence_penalty: z.number().optional(), parentMessageId: z.string().optional(), - maxOutputTokens: coerceNumber.optional(), + maxOutputTokens: coerceNumber.nullable().optional(), maxContextTokens: coerceNumber.optional(), max_tokens: coerceNumber.optional(), /* Anthropic */ @@ -634,6 +634,7 @@ export const tConversationSchema = z.object({ system: z.string().optional(), thinking: z.boolean().optional(), thinkingBudget: coerceNumber.optional(), + stream: z.boolean().optional(), /* artifacts */ artifacts: z.string().optional(), /* google */ @@ -1152,6 +1153,8 @@ export const anthropicBaseSchema = tConversationSchema.pick({ maxContextTokens: true, web_search: true, fileTokenLimit: true, + stop: true, + stream: true, }); export const anthropicSchema = anthropicBaseSchema