🎚️ feat: Anthropic Parameter Set Support via Custom Endpoints (#9415)

* refactor: modularize openai llm config logic into new getOpenAILLMConfig function (#9412) * ✈️ refactor: Migrate Anthropic's getLLMConfig to TypeScript (#9413) * refactor: move tokens.js over to packages/api and update imports * refactor: port tokens.js to typescript * refactor: move helpers.js over to packages/api and update imports * refactor: port helpers.js to typescript * refactor: move anthropic/llm.js over to packages/api and update imports * refactor: port anthropic/llm.js to typescript with supporting types in types/anthropic.ts and updated tests in llm.spec.js * refactor: move llm.spec.js over to packages/api and update import * refactor: port llm.spec.js over to typescript * 📝 Add Prompt Parameter Support for Anthropic Custom Endpoints (#9414) feat: add anthropic llm config support for openai-like (custom) endpoints * fix: missed compiler / type issues from addition of getAnthropicLLMConfig * refactor: update tokens.ts to export constants and functions, enhance type definitions, and adjust default values * WIP: first pass, decouple `llmConfig` from `configOptions` * chore: update import path for OpenAI configuration from 'llm' to 'config' * refactor: enhance type definitions for ThinkingConfig and update modelOptions in AnthropicConfigOptions * refactor: cleanup type, introduce openai transform from alt provider * chore: integrate removeNullishValues in Google llmConfig and update OpenAI exports * chore: bump version of @librechat/api to 1.3.5 in package.json and package-lock.json * refactor: update customParams type in OpenAIConfigOptions to use TConfig['customParams'] * refactor: enhance transformToOpenAIConfig to include fromEndpoint and improve config extraction * refactor: conform userId field for anthropic/openai, cleanup anthropic typing * ci: add backward compatibility tests for getOpenAIConfig with various endpoints and configurations * ci: replace userId with user in clientOptions for getLLMConfig * test: add Azure OpenAI endpoint tests for various configurations in getOpenAIConfig * refactor: defaultHeaders retrieval for prompt caching for anthropic-based custom endpoint (litellm) * test: add unit tests for getOpenAIConfig with various Anthropic model configurations * test: enhance Anthropic compatibility tests with addParams and dropParams handling * chore: update @librechat/agents dependency to version 2.4.78 in package.json and package-lock.json * chore: update @librechat/agents dependency to version 2.4.79 in package.json and package-lock.json --------- Co-authored-by: Danny Avila <danny@librechat.ai>
2026-01-31 14:55:19 +01:00 · 2025-09-08 11:35:29 -07:00 · 2025-09-08 11:35:29 -07:00 · c6ecf0095b
commit c6ecf0095b
parent 7de6f6e44c
40 changed files with 1736 additions and 432 deletions
--- a/api/app/clients/AnthropicClient.js
+++ b/api/app/clients/AnthropicClient.js
@ -10,7 +10,17 @@ const {
  validateVisionModel,
 } = require('librechat-data-provider');
 const { SplitStreamHandler: _Handler } = require('@librechat/agents');
-const { Tokenizer, createFetch, createStreamEventHandlers } = require('@librechat/api');
+const {
+  Tokenizer,
+  createFetch,
+  matchModelName,
+  getClaudeHeaders,
+  getModelMaxTokens,
+  configureReasoning,
+  checkPromptCacheSupport,
+  getModelMaxOutputTokens,
+  createStreamEventHandlers,
+} = require('@librechat/api');
 const {
  truncateText,
  formatMessage,
@ -19,12 +29,6 @@ const {
  parseParamFromPrompt,
  createContextHandlers,
 } = require('./prompts');
-const {
-  getClaudeHeaders,
-  configureReasoning,
-  checkPromptCacheSupport,
-} = require('~/server/services/Endpoints/anthropic/helpers');
-const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const { sleep } = require('~/server/utils');
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@ -1,4 +1,5 @@
 const { google } = require('googleapis');
+const { getModelMaxTokens } = require('@librechat/api');
 const { concat } = require('@langchain/core/utils/stream');
 const { ChatVertexAI } = require('@langchain/google-vertexai');
 const { Tokenizer, getSafetySettings } = require('@librechat/api');
@ -21,7 +22,6 @@ const {
 } = require('librechat-data-provider');
 const { encodeAndFormat } = require('~/server/services/Files/images');
 const { spendTokens } = require('~/models/spendTokens');
-const { getModelMaxTokens } = require('~/utils');
 const { sleep } = require('~/server/utils');
 const { logger } = require('~/config');
 const {
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -7,7 +7,9 @@ const {
  createFetch,
  resolveHeaders,
  constructAzureURL,
+  getModelMaxTokens,
  genAzureChatCompletion,
+  getModelMaxOutputTokens,
  createStreamEventHandlers,
 } = require('@librechat/api');
 const {
@ -31,13 +33,13 @@ const {
  titleInstruction,
  createContextHandlers,
 } = require('./prompts');
-const { extractBaseURL, getModelMaxTokens, getModelMaxOutputTokens } = require('~/utils');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const { addSpaceIfNeeded, sleep } = require('~/server/utils');
 const { spendTokens } = require('~/models/spendTokens');
 const { handleOpenAIErrors } = require('./tools/util');
 const { summaryBuffer } = require('./memory');
 const { runTitleChain } = require('./chains');
+const { extractBaseURL } = require('~/utils');
 const { tokenSplit } = require('./document');
 const BaseClient = require('./BaseClient');
 const { createLLM } = require('./llm');
--- a/api/app/clients/specs/FakeClient.js
+++ b/api/app/clients/specs/FakeClient.js
@ -1,5 +1,5 @@
+const { getModelMaxTokens } = require('@librechat/api');
 const BaseClient = require('../BaseClient');
-const { getModelMaxTokens } = require('../../../utils');

 class FakeClient extends BaseClient {
  constructor(apiKey, options = {}) {
--- a/api/models/tx.js
+++ b/api/models/tx.js
@ -1,4 +1,4 @@
-const { matchModelName } = require('../utils/tokens');
+const { matchModelName } = require('@librechat/api');
 const defaultRate = 6;

 /**
--- a/api/package.json
+++ b/api/package.json
@ -49,7 +49,7 @@
    "@langchain/google-vertexai": "^0.2.13",
    "@langchain/openai": "^0.5.18",
    "@langchain/textsplitters": "^0.1.0",
-    "@librechat/agents": "^2.4.77",
+    "@librechat/agents": "^2.4.79",
    "@librechat/api": "*",
    "@librechat/data-schemas": "*",
    "@microsoft/microsoft-graph-client": "^3.0.7",
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -872,11 +872,10 @@ class AgentClient extends BaseClient {
        if (agent.useLegacyContent === true) {
          messages = formatContentStrings(messages);
        }
-        if (
-          agent.model_parameters?.clientOptions?.defaultHeaders?.['anthropic-beta']?.includes(
-            'prompt-caching',
-          )
-        ) {
+        const defaultHeaders =
+          agent.model_parameters?.clientOptions?.defaultHeaders ??
+          agent.model_parameters?.configuration?.defaultHeaders;
+        if (defaultHeaders?.['anthropic-beta']?.includes('prompt-caching')) {
          messages = addCacheControl(messages);
        }

--- a/api/server/controllers/assistants/chatV1.js
+++ b/api/server/controllers/assistants/chatV1.js
@ -1,7 +1,7 @@
 const { v4 } = require('uuid');
 const { sleep } = require('@librechat/agents');
 const { logger } = require('@librechat/data-schemas');
-const { sendEvent, getBalanceConfig } = require('@librechat/api');
+const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
 const {
  Time,
  Constants,
@ -34,7 +34,6 @@ const { checkBalance } = require('~/models/balanceMethods');
 const { getConvo } = require('~/models/Conversation');
 const getLogStores = require('~/cache/getLogStores');
 const { countTokens } = require('~/server/utils');
-const { getModelMaxTokens } = require('~/utils');
 const { getOpenAIClient } = require('./helpers');

 /**
--- a/api/server/controllers/assistants/chatV2.js
+++ b/api/server/controllers/assistants/chatV2.js
@ -1,7 +1,7 @@
 const { v4 } = require('uuid');
 const { sleep } = require('@librechat/agents');
 const { logger } = require('@librechat/data-schemas');
-const { sendEvent, getBalanceConfig } = require('@librechat/api');
+const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
 const {
  Time,
  Constants,
@ -31,7 +31,6 @@ const { checkBalance } = require('~/models/balanceMethods');
 const { getConvo } = require('~/models/Conversation');
 const getLogStores = require('~/cache/getLogStores');
 const { countTokens } = require('~/server/utils');
-const { getModelMaxTokens } = require('~/utils');
 const { getOpenAIClient } = require('./helpers');

 /**
--- a/api/server/services/Endpoints/agents/agent.js
+++ b/api/server/services/Endpoints/agents/agent.js
@ -1,6 +1,7 @@
 const { Providers } = require('@librechat/agents');
 const {
  primeResources,
+  getModelMaxTokens,
  extractLibreChatParams,
  optionalChainWithEmptyCheck,
 } = require('@librechat/api');
@ -17,7 +18,6 @@ const { getProviderConfig } = require('~/server/services/Endpoints');
 const { processFiles } = require('~/server/services/Files/process');
 const { getFiles, getToolFilesByIds } = require('~/models/File');
 const { getConvoFiles } = require('~/models/Conversation');
-const { getModelMaxTokens } = require('~/utils');

 /**
 * @param {object} params
--- a/api/server/services/Endpoints/anthropic/helpers.js
+++ b/api/server/services/Endpoints/anthropic/helpers.js
@ -1,118 +0,0 @@
-const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
-const { matchModelName } = require('~/utils');
-const { logger } = require('~/config');
-
-/**
- * @param {string} modelName
- * @returns {boolean}
- */
-function checkPromptCacheSupport(modelName) {
-  const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
-  if (
-    modelMatch.includes('claude-3-5-sonnet-latest') ||
-    modelMatch.includes('claude-3.5-sonnet-latest')
-  ) {
-    return false;
-  }
-
-  return (
-    /claude-3[-.]7/.test(modelMatch) ||
-    /claude-3[-.]5-(?:sonnet|haiku)/.test(modelMatch) ||
-    /claude-3-(?:sonnet|haiku|opus)?/.test(modelMatch) ||
-    /claude-(?:sonnet|opus|haiku)-[4-9]/.test(modelMatch) ||
-    /claude-[4-9]-(?:sonnet|opus|haiku)?/.test(modelMatch) ||
-    /claude-4(?:-(?:sonnet|opus|haiku))?/.test(modelMatch)
-  );
-}
-
-/**
- * Gets the appropriate headers for Claude models with cache control
- * @param {string} model The model name
- * @param {boolean} supportsCacheControl Whether the model supports cache control
- * @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
- */
-function getClaudeHeaders(model, supportsCacheControl) {
-  if (!supportsCacheControl) {
-    return undefined;
-  }
-
-  if (/claude-3[-.]5-sonnet/.test(model)) {
-    return {
-      'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
-    };
-  } else if (/claude-3[-.]7/.test(model)) {
-    return {
-      'anthropic-beta':
-        'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31',
-    };
-  } else if (/claude-sonnet-4/.test(model)) {
-    return {
-      'anthropic-beta': 'prompt-caching-2024-07-31,context-1m-2025-08-07',
-    };
-  } else if (
-    /claude-(?:sonnet|opus|haiku)-[4-9]/.test(model) ||
-    /claude-[4-9]-(?:sonnet|opus|haiku)?/.test(model) ||
-    /claude-4(?:-(?:sonnet|opus|haiku))?/.test(model)
-  ) {
-    return {
-      'anthropic-beta': 'prompt-caching-2024-07-31',
-    };
-  } else {
-    return {
-      'anthropic-beta': 'prompt-caching-2024-07-31',
-    };
-  }
-}
-
-/**
- * Configures reasoning-related options for Claude models
- * @param {AnthropicClientOptions & { max_tokens?: number }} anthropicInput The request options object
- * @param {Object} extendedOptions Additional client configuration options
- * @param {boolean} extendedOptions.thinking Whether thinking is enabled in client config
- * @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
- * @returns {Object} Updated request options
- */
-function configureReasoning(anthropicInput, extendedOptions = {}) {
-  const updatedOptions = { ...anthropicInput };
-  const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
-  if (
-    extendedOptions.thinking &&
-    updatedOptions?.model &&
-    (/claude-3[-.]7/.test(updatedOptions.model) ||
-      /claude-(?:sonnet|opus|haiku)-[4-9]/.test(updatedOptions.model))
-  ) {
-    updatedOptions.thinking = {
-      type: 'enabled',
-    };
-  }
-
-  if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
-    updatedOptions.thinking = {
-      ...updatedOptions.thinking,
-      budget_tokens: extendedOptions.thinkingBudget,
-    };
-  }
-
-  if (
-    updatedOptions.thinking != null &&
-    (currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
-  ) {
-    const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
-    updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
-
-    logger.warn(
-      updatedOptions.max_tokens === maxTokens
-        ? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
-        : `[AnthropicClient] thinking budget_tokens (${updatedOptions.thinking.budget_tokens}) exceeds max_tokens (${updatedOptions.max_tokens}). Adjusting budget_tokens.`,
-    );
-
-    updatedOptions.thinking.budget_tokens = Math.min(
-      updatedOptions.thinking.budget_tokens,
-      Math.floor(updatedOptions.max_tokens * 0.9),
-    );
-  }
-
-  return updatedOptions;
-}
-
-module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
--- a/api/server/services/Endpoints/anthropic/initialize.js
+++ b/api/server/services/Endpoints/anthropic/initialize.js
@ -1,6 +1,6 @@
+const { getLLMConfig } = require('@librechat/api');
 const { EModelEndpoint } = require('librechat-data-provider');
 const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService');
-const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm');
 const AnthropicClient = require('~/app/clients/AnthropicClient');

 const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => {
@ -40,7 +40,6 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio
    clientOptions = Object.assign(
      {
        proxy: PROXY ?? null,
-        userId: req.user.id,
        reverseProxyUrl: ANTHROPIC_REVERSE_PROXY ?? null,
        modelOptions: endpointOption?.model_parameters ?? {},
      },
@ -49,6 +48,7 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio
    if (overrideModel) {
      clientOptions.modelOptions.model = overrideModel;
    }
+    clientOptions.modelOptions.user = req.user.id;
    return getLLMConfig(anthropicApiKey, clientOptions);
  }

--- a/api/server/services/Endpoints/anthropic/llm.js
+++ b/api/server/services/Endpoints/anthropic/llm.js
@ -1,103 +0,0 @@
-const { ProxyAgent } = require('undici');
-const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
-const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');
-
-/**
- * Generates configuration options for creating an Anthropic language model (LLM) instance.
- *
- * @param {string} apiKey - The API key for authentication with Anthropic.
- * @param {Object} [options={}] - Additional options for configuring the LLM.
- * @param {Object} [options.modelOptions] - Model-specific options.
- * @param {string} [options.modelOptions.model] - The name of the model to use.
- * @param {number} [options.modelOptions.maxOutputTokens] - The maximum number of tokens to generate.
- * @param {number} [options.modelOptions.temperature] - Controls randomness in output generation.
- * @param {number} [options.modelOptions.topP] - Controls diversity of output generation.
- * @param {number} [options.modelOptions.topK] - Controls the number of top tokens to consider.
- * @param {string[]} [options.modelOptions.stop] - Sequences where the API will stop generating further tokens.
- * @param {boolean} [options.modelOptions.stream] - Whether to stream the response.
- * @param {string} options.userId - The user ID for tracking and personalization.
- * @param {string} [options.proxy] - Proxy server URL.
- * @param {string} [options.reverseProxyUrl] - URL for a reverse proxy, if used.
- *
- * @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
- */
-function getLLMConfig(apiKey, options = {}) {
-  const systemOptions = {
-    thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default,
-    promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default,
-    thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
-  };
-  for (let key in systemOptions) {
-    delete options.modelOptions[key];
-  }
-  const defaultOptions = {
-    model: anthropicSettings.model.default,
-    maxOutputTokens: anthropicSettings.maxOutputTokens.default,
-    stream: true,
-  };
-
-  const mergedOptions = Object.assign(defaultOptions, options.modelOptions);
-
-  /** @type {AnthropicClientOptions} */
-  let requestOptions = {
-    apiKey,
-    model: mergedOptions.model,
-    stream: mergedOptions.stream,
-    temperature: mergedOptions.temperature,
-    stopSequences: mergedOptions.stop,
-    maxTokens:
-      mergedOptions.maxOutputTokens || anthropicSettings.maxOutputTokens.reset(mergedOptions.model),
-    clientOptions: {},
-    invocationKwargs: {
-      metadata: {
-        user_id: options.userId,
-      },
-    },
-  };
-
-  requestOptions = configureReasoning(requestOptions, systemOptions);
-
-  if (!/claude-3[-.]7/.test(mergedOptions.model)) {
-    requestOptions.topP = mergedOptions.topP;
-    requestOptions.topK = mergedOptions.topK;
-  } else if (requestOptions.thinking == null) {
-    requestOptions.topP = mergedOptions.topP;
-    requestOptions.topK = mergedOptions.topK;
-  }
-
-  const supportsCacheControl =
-    systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
-  const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
-  if (headers) {
-    requestOptions.clientOptions.defaultHeaders = headers;
-  }
-
-  if (options.proxy) {
-    const proxyAgent = new ProxyAgent(options.proxy);
-    requestOptions.clientOptions.fetchOptions = {
-      dispatcher: proxyAgent,
-    };
-  }
-
-  if (options.reverseProxyUrl) {
-    requestOptions.clientOptions.baseURL = options.reverseProxyUrl;
-    requestOptions.anthropicApiUrl = options.reverseProxyUrl;
-  }
-
-  const tools = [];
-
-  if (mergedOptions.web_search) {
-    tools.push({
-      type: 'web_search_20250305',
-      name: 'web_search',
-    });
-  }
-
-  return {
-    tools,
-    /** @type {AnthropicClientOptions} */
-    llmConfig: removeNullishValues(requestOptions),
-  };
-}
-
-module.exports = { getLLMConfig };
--- a/api/server/services/Endpoints/anthropic/llm.spec.js
+++ b/api/server/services/Endpoints/anthropic/llm.spec.js
--- a/api/server/services/Endpoints/bedrock/initialize.js
+++ b/api/server/services/Endpoints/bedrock/initialize.js
@ -1,3 +1,4 @@
+const { getModelMaxTokens } = require('@librechat/api');
 const { createContentAggregator } = require('@librechat/agents');
 const {
  EModelEndpoint,
@ -7,7 +8,6 @@ const {
 const { getDefaultHandlers } = require('~/server/controllers/agents/callbacks');
 const getOptions = require('~/server/services/Endpoints/bedrock/options');
 const AgentClient = require('~/server/controllers/agents/client');
-const { getModelMaxTokens } = require('~/utils');

 const initializeClient = async ({ req, res, endpointOption }) => {
  if (!endpointOption) {
--- a/api/server/services/ModelService.js
+++ b/api/server/services/ModelService.js
@ -1,13 +1,13 @@
 const axios = require('axios');
 const { Providers } = require('@librechat/agents');
-const { logAxiosError } = require('@librechat/api');
 const { logger } = require('@librechat/data-schemas');
 const { HttpsProxyAgent } = require('https-proxy-agent');
+const { logAxiosError, inputSchema, processModelData } = require('@librechat/api');
 const { EModelEndpoint, defaultModels, CacheKeys } = require('librechat-data-provider');
-const { inputSchema, extractBaseURL, processModelData } = require('~/utils');
 const { OllamaClient } = require('~/app/clients/OllamaClient');
 const { isUserProvided } = require('~/server/utils');
 const getLogStores = require('~/cache/getLogStores');
+const { extractBaseURL } = require('~/utils');

 /**
 * Splits a string by commas and trims each resulting value.
--- a/api/server/services/ModelService.spec.js
+++ b/api/server/services/ModelService.spec.js
@ -11,8 +11,8 @@ const {
  getAnthropicModels,
 } = require('./ModelService');

-jest.mock('~/utils', () => {
-  const originalUtils = jest.requireActual('~/utils');
+jest.mock('@librechat/api', () => {
+  const originalUtils = jest.requireActual('@librechat/api');
  return {
    ...originalUtils,
    processModelData: jest.fn((...args) => {
@ -108,7 +108,7 @@ describe('fetchModels with createTokenConfig true', () => {

  beforeEach(() => {
    // Clears the mock's history before each test
-    const _utils = require('~/utils');
+    const _utils = require('@librechat/api');
    axios.get.mockResolvedValue({ data });
  });

@ -120,7 +120,7 @@ describe('fetchModels with createTokenConfig true', () => {
      createTokenConfig: true,
    });

-    const { processModelData } = require('~/utils');
+    const { processModelData } = require('@librechat/api');
    expect(processModelData).toHaveBeenCalled();
    expect(processModelData).toHaveBeenCalledWith(data);
  });
--- a/api/utils/deriveBaseURL.spec.js
+++ b/api/utils/deriveBaseURL.spec.js
@ -1,7 +1,7 @@
 const axios = require('axios');
 const deriveBaseURL = require('./deriveBaseURL');
-jest.mock('~/utils', () => {
-  const originalUtils = jest.requireActual('~/utils');
+jest.mock('@librechat/api', () => {
+  const originalUtils = jest.requireActual('@librechat/api');
  return {
    ...originalUtils,
    processModelData: jest.fn((...args) => {
--- a/api/utils/index.js
+++ b/api/utils/index.js
@ -1,4 +1,3 @@
-const tokenHelpers = require('./tokens');
 const deriveBaseURL = require('./deriveBaseURL');
 const extractBaseURL = require('./extractBaseURL');
 const findMessageContent = require('./findMessageContent');
@ -6,6 +5,5 @@ const findMessageContent = require('./findMessageContent');
 module.exports = {
  deriveBaseURL,
  extractBaseURL,
-  ...tokenHelpers,
  findMessageContent,
 };
--- a/api/utils/tokens.js
+++ b/api/utils/tokens.js
@ -1,493 +0,0 @@
-const z = require('zod');
-const { EModelEndpoint } = require('librechat-data-provider');
-
-const openAIModels = {
-  'o4-mini': 200000,
-  'o3-mini': 195000, // -5000 from max
-  o3: 200000,
-  o1: 195000, // -5000 from max
-  'o1-mini': 127500, // -500 from max
-  'o1-preview': 127500, // -500 from max
-  'gpt-4': 8187, // -5 from max
-  'gpt-4-0613': 8187, // -5 from max
-  'gpt-4-32k': 32758, // -10 from max
-  'gpt-4-32k-0314': 32758, // -10 from max
-  'gpt-4-32k-0613': 32758, // -10 from max
-  'gpt-4-1106': 127500, // -500 from max
-  'gpt-4-0125': 127500, // -500 from max
-  'gpt-4.5': 127500, // -500 from max
-  'gpt-4.1': 1047576,
-  'gpt-4.1-mini': 1047576,
-  'gpt-4.1-nano': 1047576,
-  'gpt-5': 400000,
-  'gpt-5-mini': 400000,
-  'gpt-5-nano': 400000,
-  'gpt-4o': 127500, // -500 from max
-  'gpt-4o-mini': 127500, // -500 from max
-  'gpt-4o-2024-05-13': 127500, // -500 from max
-  'gpt-4o-2024-08-06': 127500, // -500 from max
-  'gpt-4-turbo': 127500, // -500 from max
-  'gpt-4-vision': 127500, // -500 from max
-  'gpt-3.5-turbo': 16375, // -10 from max
-  'gpt-3.5-turbo-0613': 4092, // -5 from max
-  'gpt-3.5-turbo-0301': 4092, // -5 from max
-  'gpt-3.5-turbo-16k': 16375, // -10 from max
-  'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
-  'gpt-3.5-turbo-1106': 16375, // -10 from max
-  'gpt-3.5-turbo-0125': 16375, // -10 from max
-};
-
-const mistralModels = {
-  'mistral-': 31990, // -10 from max
-  'mistral-7b': 31990, // -10 from max
-  'mistral-small': 31990, // -10 from max
-  'mixtral-8x7b': 31990, // -10 from max
-  'mistral-large': 131000,
-  'mistral-large-2402': 127500,
-  'mistral-large-2407': 127500,
-  'pixtral-large': 131000,
-  'mistral-saba': 32000,
-  codestral: 256000,
-  'ministral-8b': 131000,
-  'ministral-3b': 131000,
-};
-
-const cohereModels = {
-  'command-light': 4086, // -10 from max
-  'command-light-nightly': 8182, // -10 from max
-  command: 4086, // -10 from max
-  'command-nightly': 8182, // -10 from max
-  'command-r': 127500, // -500 from max
-  'command-r-plus': 127500, // -500 from max
-};
-
-const googleModels = {
-  /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
-  gemma: 8196,
-  'gemma-2': 32768,
-  'gemma-3': 32768,
-  'gemma-3-27b': 131072,
-  gemini: 30720, // -2048 from max
-  'gemini-pro-vision': 12288,
-  'gemini-exp': 2000000,
-  'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
-  'gemini-2.5-pro': 1000000,
-  'gemini-2.5-flash': 1000000,
-  'gemini-2.0': 2000000,
-  'gemini-2.0-flash': 1000000,
-  'gemini-2.0-flash-lite': 1000000,
-  'gemini-1.5': 1000000,
-  'gemini-1.5-flash': 1000000,
-  'gemini-1.5-flash-8b': 1000000,
-  'text-bison-32k': 32758, // -10 from max
-  'chat-bison-32k': 32758, // -10 from max
-  'code-bison-32k': 32758, // -10 from max
-  'codechat-bison-32k': 32758,
-  /* Codey, -5 from max: 6144 */
-  'code-': 6139,
-  'codechat-': 6139,
-  /* PaLM2, -5 from max: 8192 */
-  'text-': 8187,
-  'chat-': 8187,
-};
-
-const anthropicModels = {
-  'claude-': 100000,
-  'claude-instant': 100000,
-  'claude-2': 100000,
-  'claude-2.1': 200000,
-  'claude-3': 200000,
-  'claude-3-haiku': 200000,
-  'claude-3-sonnet': 200000,
-  'claude-3-opus': 200000,
-  'claude-3.5-haiku': 200000,
-  'claude-3-5-haiku': 200000,
-  'claude-3-5-sonnet': 200000,
-  'claude-3.5-sonnet': 200000,
-  'claude-3-7-sonnet': 200000,
-  'claude-3.7-sonnet': 200000,
-  'claude-3-5-sonnet-latest': 200000,
-  'claude-3.5-sonnet-latest': 200000,
-  'claude-sonnet-4': 1000000,
-  'claude-opus-4': 200000,
-  'claude-4': 200000,
-};
-
-const deepseekModels = {
-  'deepseek-reasoner': 63000, // -1000 from max (API)
-  deepseek: 63000, // -1000 from max (API)
-  'deepseek.r1': 127500,
-};
-
-const metaModels = {
-  // Basic patterns
-  llama3: 8000,
-  llama2: 4000,
-  'llama-3': 8000,
-  'llama-2': 4000,
-
-  // llama3.x pattern
-  'llama3.1': 127500,
-  'llama3.2': 127500,
-  'llama3.3': 127500,
-
-  // llama3-x pattern
-  'llama3-1': 127500,
-  'llama3-2': 127500,
-  'llama3-3': 127500,
-
-  // llama-3.x pattern
-  'llama-3.1': 127500,
-  'llama-3.2': 127500,
-  'llama-3.3': 127500,
-
-  // llama3.x:Nb pattern
-  'llama3.1:405b': 127500,
-  'llama3.1:70b': 127500,
-  'llama3.1:8b': 127500,
-  'llama3.2:1b': 127500,
-  'llama3.2:3b': 127500,
-  'llama3.2:11b': 127500,
-  'llama3.2:90b': 127500,
-  'llama3.3:70b': 127500,
-
-  // llama3-x-Nb pattern
-  'llama3-1-405b': 127500,
-  'llama3-1-70b': 127500,
-  'llama3-1-8b': 127500,
-  'llama3-2-1b': 127500,
-  'llama3-2-3b': 127500,
-  'llama3-2-11b': 127500,
-  'llama3-2-90b': 127500,
-  'llama3-3-70b': 127500,
-
-  // llama-3.x-Nb pattern
-  'llama-3.1-405b': 127500,
-  'llama-3.1-70b': 127500,
-  'llama-3.1-8b': 127500,
-  'llama-3.2-1b': 127500,
-  'llama-3.2-3b': 127500,
-  'llama-3.2-11b': 127500,
-  'llama-3.2-90b': 127500,
-  'llama-3.3-70b': 127500,
-
-  // Original llama2/3 patterns
-  'llama3-70b': 8000,
-  'llama3-8b': 8000,
-  'llama2-70b': 4000,
-  'llama2-13b': 4000,
-  'llama3:70b': 8000,
-  'llama3:8b': 8000,
-  'llama2:70b': 4000,
-};
-
-const ollamaModels = {
-  'qwen2.5': 32000,
-};
-
-const ai21Models = {
-  'ai21.j2-mid-v1': 8182, // -10 from max
-  'ai21.j2-ultra-v1': 8182, // -10 from max
-  'ai21.jamba-instruct-v1:0': 255500, // -500 from max
-};
-
-const amazonModels = {
-  'amazon.titan-text-lite-v1': 4000,
-  'amazon.titan-text-express-v1': 8000,
-  'amazon.titan-text-premier-v1:0': 31500, // -500 from max
-  // https://aws.amazon.com/ai/generative-ai/nova/
-  'amazon.nova-micro-v1:0': 127000, // -1000 from max,
-  'amazon.nova-lite-v1:0': 295000, // -5000 from max,
-  'amazon.nova-pro-v1:0': 295000, // -5000 from max,
-  'amazon.nova-premier-v1:0': 995000, // -5000 from max,
-};
-
-const bedrockModels = {
-  ...anthropicModels,
-  ...mistralModels,
-  ...cohereModels,
-  ...ollamaModels,
-  ...deepseekModels,
-  ...metaModels,
-  ...ai21Models,
-  ...amazonModels,
-};
-
-const xAIModels = {
-  grok: 131072,
-  'grok-beta': 131072,
-  'grok-vision-beta': 8192,
-  'grok-2': 131072,
-  'grok-2-latest': 131072,
-  'grok-2-1212': 131072,
-  'grok-2-vision': 32768,
-  'grok-2-vision-latest': 32768,
-  'grok-2-vision-1212': 32768,
-  'grok-3': 131072,
-  'grok-3-fast': 131072,
-  'grok-3-mini': 131072,
-  'grok-3-mini-fast': 131072,
-  'grok-4': 256000, // 256K context
-};
-
-const aggregateModels = {
-  ...openAIModels,
-  ...googleModels,
-  ...bedrockModels,
-  ...xAIModels,
-  // misc.
-  kimi: 131000,
-  // GPT-OSS
-  'gpt-oss-20b': 131000,
-  'gpt-oss-120b': 131000,
-};
-
-const maxTokensMap = {
-  [EModelEndpoint.azureOpenAI]: openAIModels,
-  [EModelEndpoint.openAI]: aggregateModels,
-  [EModelEndpoint.agents]: aggregateModels,
-  [EModelEndpoint.custom]: aggregateModels,
-  [EModelEndpoint.google]: googleModels,
-  [EModelEndpoint.anthropic]: anthropicModels,
-  [EModelEndpoint.bedrock]: bedrockModels,
-};
-
-const modelMaxOutputs = {
-  o1: 32268, // -500 from max: 32,768
-  'o1-mini': 65136, // -500 from max: 65,536
-  'o1-preview': 32268, // -500 from max: 32,768
-  'gpt-5': 128000,
-  'gpt-5-mini': 128000,
-  'gpt-5-nano': 128000,
-  'gpt-oss-20b': 131000,
-  'gpt-oss-120b': 131000,
-  system_default: 1024,
-};
-
-/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
-const anthropicMaxOutputs = {
-  'claude-3-haiku': 4096,
-  'claude-3-sonnet': 4096,
-  'claude-3-opus': 4096,
-  'claude-opus-4': 32000,
-  'claude-sonnet-4': 64000,
-  'claude-3.5-sonnet': 8192,
-  'claude-3-5-sonnet': 8192,
-  'claude-3.7-sonnet': 128000,
-  'claude-3-7-sonnet': 128000,
-};
-
-const maxOutputTokensMap = {
-  [EModelEndpoint.anthropic]: anthropicMaxOutputs,
-  [EModelEndpoint.azureOpenAI]: modelMaxOutputs,
-  [EModelEndpoint.openAI]: modelMaxOutputs,
-  [EModelEndpoint.custom]: modelMaxOutputs,
-};
-
-/**
- * Finds the first matching pattern in the tokens map.
- * @param {string} modelName
- * @param {Record<string, number>} tokensMap
- * @returns {string|null}
- */
-function findMatchingPattern(modelName, tokensMap) {
-  const keys = Object.keys(tokensMap);
-  for (let i = keys.length - 1; i >= 0; i--) {
-    const modelKey = keys[i];
-    if (modelName.includes(modelKey)) {
-      return modelKey;
-    }
-  }
-
-  return null;
-}
-
-/**
- * Retrieves a token value for a given model name from a tokens map.
- *
- * @param {string} modelName - The name of the model to look up.
- * @param {EndpointTokenConfig | Record<string, number>} tokensMap - The map of model names to token values.
- * @param {string} [key='context'] - The key to look up in the tokens map.
- * @returns {number|undefined} The token value for the given model or undefined if no match is found.
- */
-function getModelTokenValue(modelName, tokensMap, key = 'context') {
-  if (typeof modelName !== 'string' || !tokensMap) {
-    return undefined;
-  }
-
-  if (tokensMap[modelName]?.context) {
-    return tokensMap[modelName].context;
-  }
-
-  if (tokensMap[modelName]) {
-    return tokensMap[modelName];
-  }
-
-  const matchedPattern = findMatchingPattern(modelName, tokensMap);
-
-  if (matchedPattern) {
-    const result = tokensMap[matchedPattern];
-    return result?.[key] ?? result ?? tokensMap.system_default;
-  }
-
-  return tokensMap.system_default;
-}
-
-/**
- * Retrieves the maximum tokens for a given model name.
- *
- * @param {string} modelName - The name of the model to look up.
- * @param {string} endpoint - The endpoint (default is 'openAI').
- * @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
- * @returns {number|undefined} The maximum tokens for the given model or undefined if no match is found.
- */
-function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) {
-  const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint];
-  return getModelTokenValue(modelName, tokensMap);
-}
-
-/**
- * Retrieves the maximum output tokens for a given model name.
- *
- * @param {string} modelName - The name of the model to look up.
- * @param {string} endpoint - The endpoint (default is 'openAI').
- * @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
- * @returns {number|undefined} The maximum output tokens for the given model or undefined if no match is found.
- */
-function getModelMaxOutputTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) {
-  const tokensMap = endpointTokenConfig ?? maxOutputTokensMap[endpoint];
-  return getModelTokenValue(modelName, tokensMap, 'output');
-}
-
-/**
- * Retrieves the model name key for a given model name input. If the exact model name isn't found,
- * it searches for partial matches within the model name, checking keys in reverse order.
- *
- * @param {string} modelName - The name of the model to look up.
- * @param {string} endpoint - The endpoint (default is 'openAI').
- * @returns {string|undefined} The model name key for the given model; returns input if no match is found and is string.
- *
- * @example
- * matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613'
- * matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k'
- * matchModelName('unknown-model'); // Returns undefined
- */
-function matchModelName(modelName, endpoint = EModelEndpoint.openAI) {
-  if (typeof modelName !== 'string') {
-    return undefined;
-  }
-
-  const tokensMap = maxTokensMap[endpoint];
-  if (!tokensMap) {
-    return modelName;
-  }
-
-  if (tokensMap[modelName]) {
-    return modelName;
-  }
-
-  const matchedPattern = findMatchingPattern(modelName, tokensMap);
-  return matchedPattern || modelName;
-}
-
-const modelSchema = z.object({
-  id: z.string(),
-  pricing: z.object({
-    prompt: z.string(),
-    completion: z.string(),
-  }),
-  context_length: z.number(),
-});
-
-const inputSchema = z.object({
-  data: z.array(modelSchema),
-});
-
-/**
- * Processes a list of model data from an API and organizes it into structured data based on URL and specifics of rates and context.
- * @param {{ data: Array<z.infer<typeof modelSchema>> }} input The input object containing base URL and data fetched from the API.
- * @returns {EndpointTokenConfig} The processed model data.
- */
-function processModelData(input) {
-  const validationResult = inputSchema.safeParse(input);
-  if (!validationResult.success) {
-    throw new Error('Invalid input data');
-  }
-  const { data } = validationResult.data;
-
-  /** @type {EndpointTokenConfig} */
-  const tokenConfig = {};
-
-  for (const model of data) {
-    const modelKey = model.id;
-    if (modelKey === 'openrouter/auto') {
-      model.pricing = {
-        prompt: '0.00001',
-        completion: '0.00003',
-      };
-    }
-    const prompt = parseFloat(model.pricing.prompt) * 1000000;
-    const completion = parseFloat(model.pricing.completion) * 1000000;
-
-    tokenConfig[modelKey] = {
-      prompt,
-      completion,
-      context: model.context_length,
-    };
-  }
-
-  return tokenConfig;
-}
-
-const tiktokenModels = new Set([
-  'text-davinci-003',
-  'text-davinci-002',
-  'text-davinci-001',
-  'text-curie-001',
-  'text-babbage-001',
-  'text-ada-001',
-  'davinci',
-  'curie',
-  'babbage',
-  'ada',
-  'code-davinci-002',
-  'code-davinci-001',
-  'code-cushman-002',
-  'code-cushman-001',
-  'davinci-codex',
-  'cushman-codex',
-  'text-davinci-edit-001',
-  'code-davinci-edit-001',
-  'text-embedding-ada-002',
-  'text-similarity-davinci-001',
-  'text-similarity-curie-001',
-  'text-similarity-babbage-001',
-  'text-similarity-ada-001',
-  'text-search-davinci-doc-001',
-  'text-search-curie-doc-001',
-  'text-search-babbage-doc-001',
-  'text-search-ada-doc-001',
-  'code-search-babbage-code-001',
-  'code-search-ada-code-001',
-  'gpt2',
-  'gpt-4',
-  'gpt-4-0314',
-  'gpt-4-32k',
-  'gpt-4-32k-0314',
-  'gpt-3.5-turbo',
-  'gpt-3.5-turbo-0301',
-]);
-
-module.exports = {
-  inputSchema,
-  modelSchema,
-  maxTokensMap,
-  tiktokenModels,
-  maxOutputTokensMap,
-  matchModelName,
-  processModelData,
-  getModelMaxTokens,
-  getModelTokenValue,
-  findMatchingPattern,
-  getModelMaxOutputTokens,
-};
--- a/api/utils/tokens.spec.js
+++ b/api/utils/tokens.spec.js
@ -1,12 +1,12 @@
 const { EModelEndpoint } = require('librechat-data-provider');
 const {
+  maxTokensMap,
+  matchModelName,
+  processModelData,
+  getModelMaxTokens,
  maxOutputTokensMap,
  findMatchingPattern,
-  getModelMaxTokens,
-  processModelData,
-  matchModelName,
-  maxTokensMap,
-} = require('./tokens');
+} = require('@librechat/api');

 describe('getModelMaxTokens', () => {
  test('should return correct tokens for exact match', () => {
@ -394,7 +394,7 @@ describe('getModelMaxTokens', () => {
  });

  test('should return correct max output tokens for GPT-5 models', () => {
-    const { getModelMaxOutputTokens } = require('./tokens');
+    const { getModelMaxOutputTokens } = require('@librechat/api');
    ['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
      expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
      expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
@ -407,7 +407,7 @@ describe('getModelMaxTokens', () => {
  });

  test('should return correct max output tokens for GPT-OSS models', () => {
-    const { getModelMaxOutputTokens } = require('./tokens');
+    const { getModelMaxOutputTokens } = require('@librechat/api');
    ['gpt-oss-20b', 'gpt-oss-120b'].forEach((model) => {
      expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
      expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(