From 9ff227a5b239cab030e0d762aeef2fd17a4ee46b Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Sun, 8 Feb 2026 17:36:44 +0100 Subject: [PATCH] refactor: remove type re-exports from @librechat/api tokens Update all imports of TokenConfig and EndpointTokenConfig to import directly from librechat-data-provider instead of re-exporting through packages/api/src/types/tokens.ts. Remove the now-unnecessary re-export file and its barrel export. --- api/utils/tokens.spec.js | 14 +- client/src/hooks/useTokenUsage.ts | 4 +- packages/api/src/agents/initialize.ts | 8 +- packages/api/src/agents/usage.ts | 2 +- .../api/src/endpoints/anthropic/helpers.ts | 2 +- .../api/src/endpoints/custom/initialize.ts | 4 +- packages/api/src/types/endpoints.ts | 3 +- packages/api/src/types/index.ts | 1 - packages/api/src/types/tokens.ts | 17 - packages/api/src/utils/tokens.ts | 555 +---------------- packages/data-provider/src/index.ts | 2 + packages/data-provider/src/tokens.ts | 564 ++++++++++++++++++ 12 files changed, 584 insertions(+), 592 deletions(-) delete mode 100644 packages/api/src/types/tokens.ts create mode 100644 packages/data-provider/src/tokens.ts diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index 0cfdc30227..b3ab87cd07 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -1,13 +1,13 @@ /** Note: No hard-coded values should be used in this file. */ -const { EModelEndpoint } = require('librechat-data-provider'); const { + EModelEndpoint, maxTokensMap, matchModelName, - processModelData, getModelMaxTokens, maxOutputTokensMap, findMatchingPattern, -} = require('@librechat/api'); +} = require('librechat-data-provider'); +const { processModelData } = require('@librechat/api'); describe('getModelMaxTokens', () => { test('should return correct tokens for exact match', () => { @@ -485,7 +485,7 @@ describe('getModelMaxTokens', () => { }); test('should return correct max output tokens for GPT-5 models', () => { - const { getModelMaxOutputTokens } = require('@librechat/api'); + const { getModelMaxOutputTokens } = require('librechat-data-provider'); ['gpt-5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-5-pro'].forEach((model) => { expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]); expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe( @@ -498,7 +498,7 @@ describe('getModelMaxTokens', () => { }); test('should return correct max output tokens for GPT-OSS models', () => { - const { getModelMaxOutputTokens } = require('@librechat/api'); + const { getModelMaxOutputTokens } = require('librechat-data-provider'); ['gpt-oss-20b', 'gpt-oss-120b'].forEach((model) => { expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]); expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe( @@ -745,7 +745,7 @@ describe('Meta Models Tests', () => { }); describe('DeepSeek Max Output Tokens', () => { - const { getModelMaxOutputTokens } = require('@librechat/api'); + const { getModelMaxOutputTokens } = require('librechat-data-provider'); test('should return correct max output tokens for deepseek-chat', () => { const expected = maxOutputTokensMap[EModelEndpoint.openAI]['deepseek-chat']; @@ -1123,7 +1123,7 @@ describe('Claude Model Tests', () => { }); it('should return correct max output tokens for Claude Opus 4.6 (128K)', () => { - const { getModelMaxOutputTokens } = require('@librechat/api'); + const { getModelMaxOutputTokens } = require('librechat-data-provider'); expect(getModelMaxOutputTokens('claude-opus-4-6', EModelEndpoint.anthropic)).toBe( maxOutputTokensMap[EModelEndpoint.anthropic]['claude-opus-4-6'], ); diff --git a/client/src/hooks/useTokenUsage.ts b/client/src/hooks/useTokenUsage.ts index 06b306bc49..838f44e0c3 100644 --- a/client/src/hooks/useTokenUsage.ts +++ b/client/src/hooks/useTokenUsage.ts @@ -1,10 +1,10 @@ import { useEffect, useMemo } from 'react'; import { useParams } from 'react-router-dom'; import { useSetAtom, useAtomValue } from 'jotai'; -import { getModelMaxTokens, isAgentsEndpoint } from 'librechat-data-provider'; +import { isAgentsEndpoint, getModelMaxTokens } from 'librechat-data-provider'; import type { TMessage } from 'librechat-data-provider'; -import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage'; import { useGetMessagesByConvoId, useGetAgentByIdQuery } from '~/data-provider'; +import { tokenUsageAtom, type TokenUsage } from '~/store/tokenUsage'; import { useChatContext } from '~/Providers'; /** diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index 008aa4c0ba..e1576e5c0e 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -8,6 +8,7 @@ import { isAgentsEndpoint, replaceSpecialVars, providerEndpointMap, + getModelMaxTokens, } from 'librechat-data-provider'; import type { AgentToolResources, @@ -21,12 +22,7 @@ import type { GenericTool, LCToolRegistry, ToolMap, LCTool } from '@librechat/ag import type { Response as ServerResponse } from 'express'; import type { IMongoFile } from '@librechat/data-schemas'; import type { InitializeResultBase, ServerRequest, EndpointDbMethods } from '~/types'; -import { - optionalChainWithEmptyCheck, - extractLibreChatParams, - getModelMaxTokens, - getThreadData, -} from '~/utils'; +import { optionalChainWithEmptyCheck, extractLibreChatParams, getThreadData } from '~/utils'; import { filterFilesByEndpointConfig } from '~/files'; import { generateArtifactsPrompt } from '~/prompts'; import { getProviderConfig } from '~/endpoints'; diff --git a/packages/api/src/agents/usage.ts b/packages/api/src/agents/usage.ts index 545be9195d..87d235906d 100644 --- a/packages/api/src/agents/usage.ts +++ b/packages/api/src/agents/usage.ts @@ -1,7 +1,7 @@ import { logger } from '@librechat/data-schemas'; import type { TCustomConfig, TTransactionsConfig } from 'librechat-data-provider'; import type { UsageMetadata } from '../stream/interfaces/IJobStore'; -import type { EndpointTokenConfig } from '../types/tokens'; +import type { EndpointTokenConfig } from 'librechat-data-provider'; interface TokenUsage { promptTokens?: number; diff --git a/packages/api/src/endpoints/anthropic/helpers.ts b/packages/api/src/endpoints/anthropic/helpers.ts index d9b1c1ccfe..573c5e19e5 100644 --- a/packages/api/src/endpoints/anthropic/helpers.ts +++ b/packages/api/src/endpoints/anthropic/helpers.ts @@ -6,8 +6,8 @@ import { anthropicSettings, supportsContext1m, supportsAdaptiveThinking, + matchModelName, } from 'librechat-data-provider'; -import { matchModelName } from '~/utils/tokens'; /** * @param {string} modelName diff --git a/packages/api/src/endpoints/custom/initialize.ts b/packages/api/src/endpoints/custom/initialize.ts index 7930b1c12f..d93303cb61 100644 --- a/packages/api/src/endpoints/custom/initialize.ts +++ b/packages/api/src/endpoints/custom/initialize.ts @@ -5,9 +5,9 @@ import { FetchTokenConfig, extractEnvVariable, } from 'librechat-data-provider'; -import type { TEndpoint } from 'librechat-data-provider'; +import type { TEndpoint, EndpointTokenConfig } from 'librechat-data-provider'; import type { AppConfig } from '@librechat/data-schemas'; -import type { BaseInitializeParams, InitializeResultBase, EndpointTokenConfig } from '~/types'; +import type { BaseInitializeParams, InitializeResultBase } from '~/types'; import { getOpenAIConfig } from '~/endpoints/openai/config'; import { getCustomEndpointConfig } from '~/app/config'; import { fetchModels } from '~/endpoints/models'; diff --git a/packages/api/src/types/endpoints.ts b/packages/api/src/types/endpoints.ts index 1b9872c924..e1952f2c5a 100644 --- a/packages/api/src/types/endpoints.ts +++ b/packages/api/src/types/endpoints.ts @@ -1,6 +1,7 @@ import type { ClientOptions, OpenAIClientOptions } from '@librechat/agents'; import type { TConfig } from 'librechat-data-provider'; -import type { EndpointTokenConfig, ServerRequest } from '~/types'; +import type { EndpointTokenConfig } from 'librechat-data-provider'; +import type { ServerRequest } from '~/types'; export type TCustomEndpointsConfig = Partial<{ [key: string]: Omit }>; diff --git a/packages/api/src/types/index.ts b/packages/api/src/types/index.ts index 31adc3b9bb..788bf29415 100644 --- a/packages/api/src/types/index.ts +++ b/packages/api/src/types/index.ts @@ -12,5 +12,4 @@ export * from './mistral'; export type * from './openai'; export * from './prompts'; export * from './run'; -export * from './tokens'; export * from './stream'; diff --git a/packages/api/src/types/tokens.ts b/packages/api/src/types/tokens.ts deleted file mode 100644 index f6e03d2e8d..0000000000 --- a/packages/api/src/types/tokens.ts +++ /dev/null @@ -1,17 +0,0 @@ -/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit - * - * Note: the [key: string]: unknown is not in the original JSDoc typedef in /api/typedefs.js, but I've included it since - * getModelMaxOutputTokens calls getModelTokenValue with a key of 'output', which was not in the original JSDoc typedef, - * but would be referenced in a TokenConfig in the if(matchedPattern) portion of getModelTokenValue. - * So in order to preserve functionality for that case and any others which might reference an additional key I'm unaware of, - * I've included it here until the interface can be typed more tightly. - */ -export interface TokenConfig { - prompt: number; - completion: number; - context: number; - [key: string]: unknown; -} - -/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */ -export type EndpointTokenConfig = Record; diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index 49f1640a7a..e7414e86c3 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -1,558 +1,5 @@ import z from 'zod'; -import { EModelEndpoint } from 'librechat-data-provider'; -import type { EndpointTokenConfig, TokenConfig } from '~/types'; - -/** - * Model Token Configuration Maps - * - * IMPORTANT: Key Ordering for Pattern Matching - * ============================================ - * The `findMatchingPattern` function iterates through object keys in REVERSE order - * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching. - * - * This means: - * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot") - * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5") - * - * Example ordering for Kimi models: - * kimi: 262144, // Base pattern - checked last - * 'kimi-k2': 262144, // More specific - checked before "kimi" - * 'kimi-k2.5': 262144, // Most specific - checked first - * - * Why this matters: - * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings - * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5" - * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration - * - * When adding new model families: - * 1. Define the base/generic pattern first - * 2. Define increasingly specific patterns after - * 3. Ensure no pattern is a substring of another that should match differently - */ - -const openAIModels = { - 'o4-mini': 200000, - 'o3-mini': 195000, // -5000 from max - o3: 200000, - o1: 195000, // -5000 from max - 'o1-mini': 127500, // -500 from max - 'o1-preview': 127500, // -500 from max - 'gpt-4': 8187, // -5 from max - 'gpt-4-0613': 8187, // -5 from max - 'gpt-4-32k': 32758, // -10 from max - 'gpt-4-32k-0314': 32758, // -10 from max - 'gpt-4-32k-0613': 32758, // -10 from max - 'gpt-4-1106': 127500, // -500 from max - 'gpt-4-0125': 127500, // -500 from max - 'gpt-4.5': 127500, // -500 from max - 'gpt-4.1': 1047576, - 'gpt-4.1-mini': 1047576, - 'gpt-4.1-nano': 1047576, - 'gpt-5': 400000, - 'gpt-5.1': 400000, - 'gpt-5.2': 400000, - 'gpt-5-mini': 400000, - 'gpt-5-nano': 400000, - 'gpt-5-pro': 400000, - 'gpt-4o': 127500, // -500 from max - 'gpt-4o-mini': 127500, // -500 from max - 'gpt-4o-2024-05-13': 127500, // -500 from max - 'gpt-4-turbo': 127500, // -500 from max - 'gpt-4-vision': 127500, // -500 from max - 'gpt-3.5-turbo': 16375, // -10 from max - 'gpt-3.5-turbo-0613': 4092, // -5 from max - 'gpt-3.5-turbo-0301': 4092, // -5 from max - 'gpt-3.5-turbo-16k': 16375, // -10 from max - 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max - 'gpt-3.5-turbo-1106': 16375, // -10 from max - 'gpt-3.5-turbo-0125': 16375, // -10 from max -}; - -const mistralModels = { - 'mistral-': 31990, // -10 from max - 'mistral-7b': 31990, // -10 from max - 'mistral-small': 31990, // -10 from max - 'mixtral-8x7b': 31990, // -10 from max - 'mixtral-8x22b': 65536, - 'mistral-large': 131000, - 'mistral-large-2402': 127500, - 'mistral-large-2407': 127500, - 'mistral-nemo': 131000, - 'pixtral-large': 131000, - 'mistral-saba': 32000, - codestral: 256000, - 'ministral-8b': 131000, - 'ministral-3b': 131000, -}; - -const cohereModels = { - 'command-light': 4086, // -10 from max - 'command-light-nightly': 8182, // -10 from max - command: 4086, // -10 from max - 'command-nightly': 8182, // -10 from max - 'command-text': 4086, // -10 from max - 'command-r': 127500, // -500 from max - 'command-r-plus': 127500, // -500 from max -}; - -const googleModels = { - /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ - gemma: 8196, - 'gemma-2': 32768, - 'gemma-3': 32768, - 'gemma-3-27b': 131072, - gemini: 30720, // -2048 from max - 'gemini-pro-vision': 12288, - 'gemini-exp': 2000000, - 'gemini-3': 1000000, // 1M input tokens, 64k output tokens - 'gemini-3-pro-image': 1000000, - 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens - 'gemini-2.5-pro': 1000000, - 'gemini-2.5-flash': 1000000, - 'gemini-2.5-flash-image': 1000000, - 'gemini-2.5-flash-lite': 1000000, - 'gemini-2.0': 2000000, - 'gemini-2.0-flash': 1000000, - 'gemini-2.0-flash-lite': 1000000, - 'gemini-1.5': 1000000, - 'gemini-1.5-flash': 1000000, - 'gemini-1.5-flash-8b': 1000000, - 'text-bison-32k': 32758, // -10 from max - 'chat-bison-32k': 32758, // -10 from max - 'code-bison-32k': 32758, // -10 from max - 'codechat-bison-32k': 32758, - /* Codey, -5 from max: 6144 */ - 'code-': 6139, - 'codechat-': 6139, - /* PaLM2, -5 from max: 8192 */ - 'text-': 8187, - 'chat-': 8187, -}; - -const anthropicModels = { - 'claude-': 100000, - 'claude-instant': 100000, - 'claude-2': 100000, - 'claude-2.1': 200000, - 'claude-3': 200000, - 'claude-3-haiku': 200000, - 'claude-3-sonnet': 200000, - 'claude-3-opus': 200000, - 'claude-3.5-haiku': 200000, - 'claude-3-5-haiku': 200000, - 'claude-3-5-sonnet': 200000, - 'claude-3.5-sonnet': 200000, - 'claude-3-7-sonnet': 200000, - 'claude-3.7-sonnet': 200000, - 'claude-3-5-sonnet-latest': 200000, - 'claude-3.5-sonnet-latest': 200000, - 'claude-haiku-4-5': 200000, - 'claude-sonnet-4': 1000000, - 'claude-4': 200000, - 'claude-opus-4': 200000, - 'claude-opus-4-5': 200000, - 'claude-opus-4-6': 1000000, -}; - -const deepseekModels = { - deepseek: 128000, - 'deepseek-chat': 128000, - 'deepseek-reasoner': 128000, - 'deepseek-r1': 128000, - 'deepseek-v3': 128000, - 'deepseek.r1': 128000, -}; - -const moonshotModels = { - // Base patterns (check last due to reverse iteration) - kimi: 262144, - moonshot: 131072, - // kimi-k2 series (specific patterns) - 'kimi-latest': 128000, - 'kimi-k2': 262144, - 'kimi-k2.5': 262144, - 'kimi-k2-turbo': 262144, - 'kimi-k2-turbo-preview': 262144, - 'kimi-k2-0905': 262144, - 'kimi-k2-0905-preview': 262144, - 'kimi-k2-0711': 131072, - 'kimi-k2-0711-preview': 131072, - 'kimi-k2-thinking': 262144, - 'kimi-k2-thinking-turbo': 262144, - // moonshot-v1 series (specific patterns) - 'moonshot-v1': 131072, - 'moonshot-v1-auto': 131072, - 'moonshot-v1-8k': 8192, - 'moonshot-v1-8k-vision': 8192, - 'moonshot-v1-8k-vision-preview': 8192, - 'moonshot-v1-32k': 32768, - 'moonshot-v1-32k-vision': 32768, - 'moonshot-v1-32k-vision-preview': 32768, - 'moonshot-v1-128k': 131072, - 'moonshot-v1-128k-vision': 131072, - 'moonshot-v1-128k-vision-preview': 131072, - // Bedrock moonshot models - 'moonshot.kimi': 262144, - 'moonshot.kimi-k2': 262144, - 'moonshot.kimi-k2.5': 262144, - 'moonshot.kimi-k2-thinking': 262144, - 'moonshot.kimi-k2-0711': 131072, -}; - -const metaModels = { - // Basic patterns - llama3: 8000, - llama2: 4000, - 'llama-3': 8000, - 'llama-2': 4000, - - // llama3.x pattern - 'llama3.1': 127500, - 'llama3.2': 127500, - 'llama3.3': 127500, - - // llama3-x pattern - 'llama3-1': 127500, - 'llama3-2': 127500, - 'llama3-3': 127500, - - // llama-3.x pattern - 'llama-3.1': 127500, - 'llama-3.2': 127500, - 'llama-3.3': 127500, - - // llama3.x:Nb pattern - 'llama3.1:405b': 127500, - 'llama3.1:70b': 127500, - 'llama3.1:8b': 127500, - 'llama3.2:1b': 127500, - 'llama3.2:3b': 127500, - 'llama3.2:11b': 127500, - 'llama3.2:90b': 127500, - 'llama3.3:70b': 127500, - - // llama3-x-Nb pattern - 'llama3-1-405b': 127500, - 'llama3-1-70b': 127500, - 'llama3-1-8b': 127500, - 'llama3-2-1b': 127500, - 'llama3-2-3b': 127500, - 'llama3-2-11b': 127500, - 'llama3-2-90b': 127500, - 'llama3-3-70b': 127500, - - // llama-3.x-Nb pattern - 'llama-3.1-405b': 127500, - 'llama-3.1-70b': 127500, - 'llama-3.1-8b': 127500, - 'llama-3.2-1b': 127500, - 'llama-3.2-3b': 127500, - 'llama-3.2-11b': 127500, - 'llama-3.2-90b': 127500, - 'llama-3.3-70b': 127500, - - // Original llama2/3 patterns - 'llama3-70b': 8000, - 'llama3-8b': 8000, - 'llama2-70b': 4000, - 'llama2-13b': 4000, - 'llama3:70b': 8000, - 'llama3:8b': 8000, - 'llama2:70b': 4000, -}; - -const qwenModels = { - qwen: 32000, - 'qwen2.5': 32000, - 'qwen-turbo': 1000000, - 'qwen-plus': 131000, - 'qwen-max': 32000, - 'qwq-32b': 32000, - // Qwen3 models - qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) - 'qwen3-8b': 128000, - 'qwen3-14b': 40960, - 'qwen3-30b-a3b': 40960, - 'qwen3-32b': 40960, - 'qwen3-235b-a22b': 40960, - // Qwen3 VL (Vision-Language) models - 'qwen3-vl-8b-thinking': 256000, - 'qwen3-vl-8b-instruct': 262144, - 'qwen3-vl-30b-a3b': 262144, - 'qwen3-vl-235b-a22b': 131072, - // Qwen3 specialized models - 'qwen3-max': 256000, - 'qwen3-coder': 262144, - 'qwen3-coder-30b-a3b': 262144, - 'qwen3-coder-plus': 128000, - 'qwen3-coder-flash': 128000, - 'qwen3-next-80b-a3b': 262144, -}; - -const ai21Models = { - 'j2-mid': 8182, // -10 from max - 'j2-ultra': 8182, // -10 from max - 'jamba-instruct': 255500, // -500 from max -}; - -const amazonModels = { - // Amazon Titan models - 'titan-text-lite': 4000, - 'titan-text-express': 8000, - 'titan-text-premier': 31500, // -500 from max - // Amazon Nova models - // https://aws.amazon.com/ai/generative-ai/nova/ - 'nova-micro': 127000, // -1000 from max - 'nova-lite': 295000, // -5000 from max - 'nova-pro': 295000, // -5000 from max - 'nova-premier': 995000, // -5000 from max -}; - -const bedrockModels = { - ...anthropicModels, - ...mistralModels, - ...cohereModels, - ...deepseekModels, - ...moonshotModels, - ...metaModels, - ...ai21Models, - ...amazonModels, -}; - -const xAIModels = { - grok: 131072, - 'grok-beta': 131072, - 'grok-vision-beta': 8192, - 'grok-2': 131072, - 'grok-2-latest': 131072, - 'grok-2-1212': 131072, - 'grok-2-vision': 32768, - 'grok-2-vision-latest': 32768, - 'grok-2-vision-1212': 32768, - 'grok-3': 131072, - 'grok-3-fast': 131072, - 'grok-3-mini': 131072, - 'grok-3-mini-fast': 131072, - 'grok-4': 256000, // 256K context - 'grok-4-fast': 2000000, // 2M context - 'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants) - 'grok-code-fast': 256000, // 256K context -}; - -const aggregateModels = { - ...openAIModels, - ...googleModels, - ...bedrockModels, - ...xAIModels, - ...qwenModels, - // GPT-OSS - 'gpt-oss': 131000, - 'gpt-oss:20b': 131000, - 'gpt-oss-20b': 131000, - 'gpt-oss:120b': 131000, - 'gpt-oss-120b': 131000, - // GLM models (Zhipu AI) - glm4: 128000, - 'glm-4': 128000, - 'glm-4-32b': 128000, - 'glm-4.5': 131000, - 'glm-4.5-air': 131000, - 'glm-4.5v': 66000, - 'glm-4.6': 200000, -}; - -export const maxTokensMap = { - [EModelEndpoint.azureOpenAI]: openAIModels, - [EModelEndpoint.openAI]: aggregateModels, - [EModelEndpoint.agents]: aggregateModels, - [EModelEndpoint.custom]: aggregateModels, - [EModelEndpoint.google]: googleModels, - [EModelEndpoint.anthropic]: anthropicModels, - [EModelEndpoint.bedrock]: bedrockModels, -}; - -export const modelMaxOutputs = { - o1: 32268, // -500 from max: 32,768 - 'o1-mini': 65136, // -500 from max: 65,536 - 'o1-preview': 32268, // -500 from max: 32,768 - 'gpt-5': 128000, - 'gpt-5.1': 128000, - 'gpt-5.2': 128000, - 'gpt-5-mini': 128000, - 'gpt-5-nano': 128000, - 'gpt-5-pro': 128000, - 'gpt-oss-20b': 131000, - 'gpt-oss-120b': 131000, - system_default: 32000, -}; - -/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */ -const anthropicMaxOutputs = { - 'claude-3-haiku': 4096, - 'claude-3-sonnet': 4096, - 'claude-3-opus': 4096, - 'claude-haiku-4-5': 64000, - 'claude-sonnet-4': 64000, - 'claude-opus-4': 32000, - 'claude-opus-4-5': 64000, - 'claude-opus-4-6': 128000, - 'claude-3.5-sonnet': 8192, - 'claude-3-5-sonnet': 8192, - 'claude-3.7-sonnet': 128000, - 'claude-3-7-sonnet': 128000, -}; - -/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */ -const deepseekMaxOutputs = { - deepseek: 8000, // deepseek-chat default: 4K, max: 8K - 'deepseek-chat': 8000, - 'deepseek-reasoner': 64000, // default: 32K, max: 64K - 'deepseek-r1': 64000, - 'deepseek-v3': 8000, - 'deepseek.r1': 64000, -}; - -export const maxOutputTokensMap = { - [EModelEndpoint.anthropic]: anthropicMaxOutputs, - [EModelEndpoint.azureOpenAI]: modelMaxOutputs, - [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, - [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, -}; - -/** - * Finds the first matching pattern in the tokens map. - * @param {string} modelName - * @param {Record | EndpointTokenConfig} tokensMap - * @returns {string|null} - */ -export function findMatchingPattern( - modelName: string, - tokensMap: Record | EndpointTokenConfig, -): string | null { - const keys = Object.keys(tokensMap); - const lowerModelName = modelName.toLowerCase(); - for (let i = keys.length - 1; i >= 0; i--) { - const modelKey = keys[i]; - if (lowerModelName.includes(modelKey)) { - return modelKey; - } - } - - return null; -} - -/** - * Retrieves a token value for a given model name from a tokens map. - * - * @param modelName - The name of the model to look up. - * @param tokensMap - The map of model names to token values. - * @param [key='context'] - The key to look up in the tokens map. - * @returns The token value for the given model or undefined if no match is found. - */ -export function getModelTokenValue( - modelName: string, - tokensMap?: EndpointTokenConfig | Record, - key = 'context' as keyof TokenConfig, -): number | undefined { - if (typeof modelName !== 'string' || !tokensMap) { - return undefined; - } - - const value = tokensMap[modelName]; - if (typeof value === 'number') { - return value; - } - - if (value?.context) { - return value.context; - } - - const matchedPattern = findMatchingPattern(modelName, tokensMap); - - if (matchedPattern) { - const result = tokensMap[matchedPattern]; - if (typeof result === 'number') { - return result; - } - - const tokenValue = result?.[key]; - if (typeof tokenValue === 'number') { - return tokenValue; - } - return tokensMap.system_default as number | undefined; - } - - return tokensMap.system_default as number | undefined; -} - -/** - * Retrieves the maximum tokens for a given model name. - * - * @param modelName - The name of the model to look up. - * @param endpoint - The endpoint (default is 'openAI'). - * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup - * @returns The maximum tokens for the given model or undefined if no match is found. - */ -export function getModelMaxTokens( - modelName: string, - endpoint = EModelEndpoint.openAI, - endpointTokenConfig?: EndpointTokenConfig, -): number | undefined { - const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap]; - return getModelTokenValue(modelName, tokensMap); -} - -/** - * Retrieves the maximum output tokens for a given model name. - * - * @param modelName - The name of the model to look up. - * @param endpoint - The endpoint (default is 'openAI'). - * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup - * @returns The maximum output tokens for the given model or undefined if no match is found. - */ -export function getModelMaxOutputTokens( - modelName: string, - endpoint = EModelEndpoint.openAI, - endpointTokenConfig?: EndpointTokenConfig, -): number | undefined { - const tokensMap = - endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap]; - return getModelTokenValue(modelName, tokensMap, 'output'); -} - -/** - * Retrieves the model name key for a given model name input. If the exact model name isn't found, - * it searches for partial matches within the model name, checking keys in reverse order. - * - * @param modelName - The name of the model to look up. - * @param endpoint - The endpoint (default is 'openAI'). - * @returns The model name key for the given model; returns input if no match is found and is string. - * - * @example - * matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613' - * matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k' - * matchModelName('unknown-model'); // Returns undefined - */ -export function matchModelName( - modelName: string, - endpoint = EModelEndpoint.openAI, -): string | undefined { - if (typeof modelName !== 'string') { - return undefined; - } - - const tokensMap: Record = maxTokensMap[endpoint as keyof typeof maxTokensMap]; - if (!tokensMap) { - return modelName; - } - - if (tokensMap[modelName]) { - return modelName; - } - - const matchedPattern = findMatchingPattern(modelName, tokensMap); - return matchedPattern || modelName; -} +import type { EndpointTokenConfig } from 'librechat-data-provider'; export const modelSchema = z.object({ id: z.string(), diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts index c57ca82845..279e8da550 100644 --- a/packages/data-provider/src/index.ts +++ b/packages/data-provider/src/index.ts @@ -40,6 +40,8 @@ export { dataService }; import * as dataService from './data-service'; /* general helpers */ export * from './utils'; +/* tokens */ +export * from './tokens'; export * from './actions'; export { default as createPayload } from './createPayload'; // /* react query hooks */ diff --git a/packages/data-provider/src/tokens.ts b/packages/data-provider/src/tokens.ts new file mode 100644 index 0000000000..b3eeaf3482 --- /dev/null +++ b/packages/data-provider/src/tokens.ts @@ -0,0 +1,564 @@ +import { EModelEndpoint } from './schemas'; + +/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit */ +export interface TokenConfig { + prompt: number; + completion: number; + context: number; + [key: string]: unknown; +} + +/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */ +export type EndpointTokenConfig = Record; + +/** + * Model Token Configuration Maps + * + * IMPORTANT: Key Ordering for Pattern Matching + * ============================================ + * The `findMatchingPattern` function iterates through object keys in REVERSE order + * (last-defined keys are checked first) and uses `modelName.includes(key)` for matching. + * + * This means: + * 1. BASE PATTERNS must be defined FIRST (e.g., "kimi", "moonshot") + * 2. SPECIFIC PATTERNS must be defined AFTER their base patterns (e.g., "kimi-k2", "kimi-k2.5") + * + * Example ordering for Kimi models: + * kimi: 262144, // Base pattern - checked last + * 'kimi-k2': 262144, // More specific - checked before "kimi" + * 'kimi-k2.5': 262144, // Most specific - checked first + * + * Why this matters: + * - Model name "kimi-k2.5" contains both "kimi" and "kimi-k2" as substrings + * - If "kimi" were checked first, it would incorrectly match "kimi-k2.5" + * - By defining specific patterns AFTER base patterns, they're checked first in reverse iteration + * + * When adding new model families: + * 1. Define the base/generic pattern first + * 2. Define increasingly specific patterns after + * 3. Ensure no pattern is a substring of another that should match differently + */ + +const openAIModels = { + 'o4-mini': 200000, + 'o3-mini': 195000, // -5000 from max + o3: 200000, + o1: 195000, // -5000 from max + 'o1-mini': 127500, // -500 from max + 'o1-preview': 127500, // -500 from max + 'gpt-4': 8187, // -5 from max + 'gpt-4-0613': 8187, // -5 from max + 'gpt-4-32k': 32758, // -10 from max + 'gpt-4-32k-0314': 32758, // -10 from max + 'gpt-4-32k-0613': 32758, // -10 from max + 'gpt-4-1106': 127500, // -500 from max + 'gpt-4-0125': 127500, // -500 from max + 'gpt-4.5': 127500, // -500 from max + 'gpt-4.1': 1047576, + 'gpt-4.1-mini': 1047576, + 'gpt-4.1-nano': 1047576, + 'gpt-5': 400000, + 'gpt-5.1': 400000, + 'gpt-5.2': 400000, + 'gpt-5-mini': 400000, + 'gpt-5-nano': 400000, + 'gpt-5-pro': 400000, + 'gpt-4o': 127500, // -500 from max + 'gpt-4o-mini': 127500, // -500 from max + 'gpt-4o-2024-05-13': 127500, // -500 from max + 'gpt-4-turbo': 127500, // -500 from max + 'gpt-4-vision': 127500, // -500 from max + 'gpt-3.5-turbo': 16375, // -10 from max + 'gpt-3.5-turbo-0613': 4092, // -5 from max + 'gpt-3.5-turbo-0301': 4092, // -5 from max + 'gpt-3.5-turbo-16k': 16375, // -10 from max + 'gpt-3.5-turbo-16k-0613': 16375, // -10 from max + 'gpt-3.5-turbo-1106': 16375, // -10 from max + 'gpt-3.5-turbo-0125': 16375, // -10 from max +}; + +const mistralModels = { + 'mistral-': 31990, // -10 from max + 'mistral-7b': 31990, // -10 from max + 'mistral-small': 31990, // -10 from max + 'mixtral-8x7b': 31990, // -10 from max + 'mixtral-8x22b': 65536, + 'mistral-large': 131000, + 'mistral-large-2402': 127500, + 'mistral-large-2407': 127500, + 'mistral-nemo': 131000, + 'pixtral-large': 131000, + 'mistral-saba': 32000, + codestral: 256000, + 'ministral-8b': 131000, + 'ministral-3b': 131000, +}; + +const cohereModels = { + 'command-light': 4086, // -10 from max + 'command-light-nightly': 8182, // -10 from max + command: 4086, // -10 from max + 'command-nightly': 8182, // -10 from max + 'command-text': 4086, // -10 from max + 'command-r': 127500, // -500 from max + 'command-r-plus': 127500, // -500 from max +}; + +const googleModels = { + /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ + gemma: 8196, + 'gemma-2': 32768, + 'gemma-3': 32768, + 'gemma-3-27b': 131072, + gemini: 30720, // -2048 from max + 'gemini-pro-vision': 12288, + 'gemini-exp': 2000000, + 'gemini-3': 1000000, // 1M input tokens, 64k output tokens + 'gemini-3-pro-image': 1000000, + 'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens + 'gemini-2.5-pro': 1000000, + 'gemini-2.5-flash': 1000000, + 'gemini-2.5-flash-image': 1000000, + 'gemini-2.5-flash-lite': 1000000, + 'gemini-2.0': 2000000, + 'gemini-2.0-flash': 1000000, + 'gemini-2.0-flash-lite': 1000000, + 'gemini-1.5': 1000000, + 'gemini-1.5-flash': 1000000, + 'gemini-1.5-flash-8b': 1000000, + 'text-bison-32k': 32758, // -10 from max + 'chat-bison-32k': 32758, // -10 from max + 'code-bison-32k': 32758, // -10 from max + 'codechat-bison-32k': 32758, + /* Codey, -5 from max: 6144 */ + 'code-': 6139, + 'codechat-': 6139, + /* PaLM2, -5 from max: 8192 */ + 'text-': 8187, + 'chat-': 8187, +}; + +const anthropicModels = { + 'claude-': 100000, + 'claude-instant': 100000, + 'claude-2': 100000, + 'claude-2.1': 200000, + 'claude-3': 200000, + 'claude-3-haiku': 200000, + 'claude-3-sonnet': 200000, + 'claude-3-opus': 200000, + 'claude-3.5-haiku': 200000, + 'claude-3-5-haiku': 200000, + 'claude-3-5-sonnet': 200000, + 'claude-3.5-sonnet': 200000, + 'claude-3-7-sonnet': 200000, + 'claude-3.7-sonnet': 200000, + 'claude-3-5-sonnet-latest': 200000, + 'claude-3.5-sonnet-latest': 200000, + 'claude-haiku-4-5': 200000, + 'claude-sonnet-4': 1000000, + 'claude-4': 200000, + 'claude-opus-4': 200000, + 'claude-opus-4-5': 200000, + 'claude-opus-4-6': 1000000, +}; + +const deepseekModels = { + deepseek: 128000, + 'deepseek-chat': 128000, + 'deepseek-reasoner': 128000, + 'deepseek-r1': 128000, + 'deepseek-v3': 128000, + 'deepseek.r1': 128000, +}; + +const moonshotModels = { + // Base patterns (check last due to reverse iteration) + kimi: 262144, + moonshot: 131072, + // kimi-k2 series (specific patterns) + 'kimi-latest': 128000, + 'kimi-k2': 262144, + 'kimi-k2.5': 262144, + 'kimi-k2-turbo': 262144, + 'kimi-k2-turbo-preview': 262144, + 'kimi-k2-0905': 262144, + 'kimi-k2-0905-preview': 262144, + 'kimi-k2-0711': 131072, + 'kimi-k2-0711-preview': 131072, + 'kimi-k2-thinking': 262144, + 'kimi-k2-thinking-turbo': 262144, + // moonshot-v1 series (specific patterns) + 'moonshot-v1': 131072, + 'moonshot-v1-auto': 131072, + 'moonshot-v1-8k': 8192, + 'moonshot-v1-8k-vision': 8192, + 'moonshot-v1-8k-vision-preview': 8192, + 'moonshot-v1-32k': 32768, + 'moonshot-v1-32k-vision': 32768, + 'moonshot-v1-32k-vision-preview': 32768, + 'moonshot-v1-128k': 131072, + 'moonshot-v1-128k-vision': 131072, + 'moonshot-v1-128k-vision-preview': 131072, + // Bedrock moonshot models + 'moonshot.kimi': 262144, + 'moonshot.kimi-k2': 262144, + 'moonshot.kimi-k2.5': 262144, + 'moonshot.kimi-k2-thinking': 262144, + 'moonshot.kimi-k2-0711': 131072, +}; + +const metaModels = { + // Basic patterns + llama3: 8000, + llama2: 4000, + 'llama-3': 8000, + 'llama-2': 4000, + + // llama3.x pattern + 'llama3.1': 127500, + 'llama3.2': 127500, + 'llama3.3': 127500, + + // llama3-x pattern + 'llama3-1': 127500, + 'llama3-2': 127500, + 'llama3-3': 127500, + + // llama-3.x pattern + 'llama-3.1': 127500, + 'llama-3.2': 127500, + 'llama-3.3': 127500, + + // llama3.x:Nb pattern + 'llama3.1:405b': 127500, + 'llama3.1:70b': 127500, + 'llama3.1:8b': 127500, + 'llama3.2:1b': 127500, + 'llama3.2:3b': 127500, + 'llama3.2:11b': 127500, + 'llama3.2:90b': 127500, + 'llama3.3:70b': 127500, + + // llama3-x-Nb pattern + 'llama3-1-405b': 127500, + 'llama3-1-70b': 127500, + 'llama3-1-8b': 127500, + 'llama3-2-1b': 127500, + 'llama3-2-3b': 127500, + 'llama3-2-11b': 127500, + 'llama3-2-90b': 127500, + 'llama3-3-70b': 127500, + + // llama-3.x-Nb pattern + 'llama-3.1-405b': 127500, + 'llama-3.1-70b': 127500, + 'llama-3.1-8b': 127500, + 'llama-3.2-1b': 127500, + 'llama-3.2-3b': 127500, + 'llama-3.2-11b': 127500, + 'llama-3.2-90b': 127500, + 'llama-3.3-70b': 127500, + + // Original llama2/3 patterns + 'llama3-70b': 8000, + 'llama3-8b': 8000, + 'llama2-70b': 4000, + 'llama2-13b': 4000, + 'llama3:70b': 8000, + 'llama3:8b': 8000, + 'llama2:70b': 4000, +}; + +const qwenModels = { + qwen: 32000, + 'qwen2.5': 32000, + 'qwen-turbo': 1000000, + 'qwen-plus': 131000, + 'qwen-max': 32000, + 'qwq-32b': 32000, + // Qwen3 models + qwen3: 40960, // Qwen3 base pattern (using qwen3-4b context) + 'qwen3-8b': 128000, + 'qwen3-14b': 40960, + 'qwen3-30b-a3b': 40960, + 'qwen3-32b': 40960, + 'qwen3-235b-a22b': 40960, + // Qwen3 VL (Vision-Language) models + 'qwen3-vl-8b-thinking': 256000, + 'qwen3-vl-8b-instruct': 262144, + 'qwen3-vl-30b-a3b': 262144, + 'qwen3-vl-235b-a22b': 131072, + // Qwen3 specialized models + 'qwen3-max': 256000, + 'qwen3-coder': 262144, + 'qwen3-coder-30b-a3b': 262144, + 'qwen3-coder-plus': 128000, + 'qwen3-coder-flash': 128000, + 'qwen3-next-80b-a3b': 262144, +}; + +const ai21Models = { + 'j2-mid': 8182, // -10 from max + 'j2-ultra': 8182, // -10 from max + 'jamba-instruct': 255500, // -500 from max +}; + +const amazonModels = { + // Amazon Titan models + 'titan-text-lite': 4000, + 'titan-text-express': 8000, + 'titan-text-premier': 31500, // -500 from max + // Amazon Nova models + // https://aws.amazon.com/ai/generative-ai/nova/ + 'nova-micro': 127000, // -1000 from max + 'nova-lite': 295000, // -5000 from max + 'nova-pro': 295000, // -5000 from max + 'nova-premier': 995000, // -5000 from max +}; + +const bedrockModels = { + ...anthropicModels, + ...mistralModels, + ...cohereModels, + ...deepseekModels, + ...moonshotModels, + ...metaModels, + ...ai21Models, + ...amazonModels, +}; + +const xAIModels = { + grok: 131072, + 'grok-beta': 131072, + 'grok-vision-beta': 8192, + 'grok-2': 131072, + 'grok-2-latest': 131072, + 'grok-2-1212': 131072, + 'grok-2-vision': 32768, + 'grok-2-vision-latest': 32768, + 'grok-2-vision-1212': 32768, + 'grok-3': 131072, + 'grok-3-fast': 131072, + 'grok-3-mini': 131072, + 'grok-3-mini-fast': 131072, + 'grok-4': 256000, // 256K context + 'grok-4-fast': 2000000, // 2M context + 'grok-4-1-fast': 2000000, // 2M context (covers reasoning & non-reasoning variants) + 'grok-code-fast': 256000, // 256K context +}; + +const aggregateModels = { + ...openAIModels, + ...googleModels, + ...bedrockModels, + ...xAIModels, + ...qwenModels, + // GPT-OSS + 'gpt-oss': 131000, + 'gpt-oss:20b': 131000, + 'gpt-oss-20b': 131000, + 'gpt-oss:120b': 131000, + 'gpt-oss-120b': 131000, + // GLM models (Zhipu AI) + glm4: 128000, + 'glm-4': 128000, + 'glm-4-32b': 128000, + 'glm-4.5': 131000, + 'glm-4.5-air': 131000, + 'glm-4.5v': 66000, + 'glm-4.6': 200000, +}; + +export const maxTokensMap = { + [EModelEndpoint.azureOpenAI]: openAIModels, + [EModelEndpoint.openAI]: aggregateModels, + [EModelEndpoint.agents]: aggregateModels, + [EModelEndpoint.custom]: aggregateModels, + [EModelEndpoint.google]: googleModels, + [EModelEndpoint.anthropic]: anthropicModels, + [EModelEndpoint.bedrock]: bedrockModels, +}; + +export const modelMaxOutputs = { + o1: 32268, // -500 from max: 32,768 + 'o1-mini': 65136, // -500 from max: 65,536 + 'o1-preview': 32268, // -500 from max: 32,768 + 'gpt-5': 128000, + 'gpt-5.1': 128000, + 'gpt-5.2': 128000, + 'gpt-5-mini': 128000, + 'gpt-5-nano': 128000, + 'gpt-5-pro': 128000, + 'gpt-oss-20b': 131000, + 'gpt-oss-120b': 131000, + system_default: 32000, +}; + +/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */ +const anthropicMaxOutputs = { + 'claude-3-haiku': 4096, + 'claude-3-sonnet': 4096, + 'claude-3-opus': 4096, + 'claude-haiku-4-5': 64000, + 'claude-sonnet-4': 64000, + 'claude-opus-4': 32000, + 'claude-opus-4-5': 64000, + 'claude-opus-4-6': 128000, + 'claude-3.5-sonnet': 8192, + 'claude-3-5-sonnet': 8192, + 'claude-3.7-sonnet': 128000, + 'claude-3-7-sonnet': 128000, +}; + +/** Outputs from https://api-docs.deepseek.com/quick_start/pricing */ +const deepseekMaxOutputs = { + deepseek: 8000, // deepseek-chat default: 4K, max: 8K + 'deepseek-chat': 8000, + 'deepseek-reasoner': 64000, // default: 32K, max: 64K + 'deepseek-r1': 64000, + 'deepseek-v3': 8000, + 'deepseek.r1': 64000, +}; + +export const maxOutputTokensMap = { + [EModelEndpoint.anthropic]: anthropicMaxOutputs, + [EModelEndpoint.azureOpenAI]: modelMaxOutputs, + [EModelEndpoint.openAI]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, + [EModelEndpoint.custom]: { ...modelMaxOutputs, ...deepseekMaxOutputs }, +}; + +/** + * Finds the first matching pattern in the tokens map. + * @param {string} modelName + * @param {Record | EndpointTokenConfig} tokensMap + * @returns {string|null} + */ +export function findMatchingPattern( + modelName: string, + tokensMap: Record | EndpointTokenConfig, +): string | null { + const keys = Object.keys(tokensMap); + const lowerModelName = modelName.toLowerCase(); + for (let i = keys.length - 1; i >= 0; i--) { + const modelKey = keys[i]; + if (lowerModelName.includes(modelKey)) { + return modelKey; + } + } + + return null; +} + +/** + * Retrieves a token value for a given model name from a tokens map. + * + * @param modelName - The name of the model to look up. + * @param tokensMap - The map of model names to token values. + * @param [key='context'] - The key to look up in the tokens map. + * @returns The token value for the given model or undefined if no match is found. + */ +export function getModelTokenValue( + modelName: string, + tokensMap?: EndpointTokenConfig | Record, + key = 'context' as keyof TokenConfig, +): number | undefined { + if (typeof modelName !== 'string' || !tokensMap) { + return undefined; + } + + const value = tokensMap[modelName]; + if (typeof value === 'number') { + return value; + } + + if (value?.context) { + return value.context; + } + + const matchedPattern = findMatchingPattern(modelName, tokensMap); + + if (matchedPattern) { + const result = tokensMap[matchedPattern]; + if (typeof result === 'number') { + return result; + } + + const tokenValue = result?.[key]; + if (typeof tokenValue === 'number') { + return tokenValue; + } + return tokensMap.system_default as number | undefined; + } + + return tokensMap.system_default as number | undefined; +} + +/** + * Retrieves the maximum tokens for a given model name. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup + * @returns The maximum tokens for the given model or undefined if no match is found. + */ +export function getModelMaxTokens( + modelName: string, + endpoint = EModelEndpoint.openAI, + endpointTokenConfig?: EndpointTokenConfig, +): number | undefined { + const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap]; + return getModelTokenValue(modelName, tokensMap); +} + +/** + * Retrieves the maximum output tokens for a given model name. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup + * @returns The maximum output tokens for the given model or undefined if no match is found. + */ +export function getModelMaxOutputTokens( + modelName: string, + endpoint = EModelEndpoint.openAI, + endpointTokenConfig?: EndpointTokenConfig, +): number | undefined { + const tokensMap = + endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap]; + return getModelTokenValue(modelName, tokensMap, 'output'); +} + +/** + * Retrieves the model name key for a given model name input. If the exact model name isn't found, + * it searches for partial matches within the model name, checking keys in reverse order. + * + * @param modelName - The name of the model to look up. + * @param endpoint - The endpoint (default is 'openAI'). + * @returns The model name key for the given model; returns input if no match is found and is string. + * + * @example + * matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613' + * matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k' + * matchModelName('unknown-model'); // Returns undefined + */ +export function matchModelName( + modelName: string, + endpoint = EModelEndpoint.openAI, +): string | undefined { + if (typeof modelName !== 'string') { + return undefined; + } + + const tokensMap: Record = maxTokensMap[endpoint as keyof typeof maxTokensMap]; + if (!tokensMap) { + return modelName; + } + + if (tokensMap[modelName]) { + return modelName; + } + + const matchedPattern = findMatchingPattern(modelName, tokensMap); + return matchedPattern || modelName; +}