mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-21 21:50:49 +02:00
🎚️ feat: Anthropic Parameter Set Support via Custom Endpoints (#9415)
* refactor: modularize openai llm config logic into new getOpenAILLMConfig function (#9412) * ✈️ refactor: Migrate Anthropic's getLLMConfig to TypeScript (#9413) * refactor: move tokens.js over to packages/api and update imports * refactor: port tokens.js to typescript * refactor: move helpers.js over to packages/api and update imports * refactor: port helpers.js to typescript * refactor: move anthropic/llm.js over to packages/api and update imports * refactor: port anthropic/llm.js to typescript with supporting types in types/anthropic.ts and updated tests in llm.spec.js * refactor: move llm.spec.js over to packages/api and update import * refactor: port llm.spec.js over to typescript * 📝 Add Prompt Parameter Support for Anthropic Custom Endpoints (#9414) feat: add anthropic llm config support for openai-like (custom) endpoints * fix: missed compiler / type issues from addition of getAnthropicLLMConfig * refactor: update tokens.ts to export constants and functions, enhance type definitions, and adjust default values * WIP: first pass, decouple `llmConfig` from `configOptions` * chore: update import path for OpenAI configuration from 'llm' to 'config' * refactor: enhance type definitions for ThinkingConfig and update modelOptions in AnthropicConfigOptions * refactor: cleanup type, introduce openai transform from alt provider * chore: integrate removeNullishValues in Google llmConfig and update OpenAI exports * chore: bump version of @librechat/api to 1.3.5 in package.json and package-lock.json * refactor: update customParams type in OpenAIConfigOptions to use TConfig['customParams'] * refactor: enhance transformToOpenAIConfig to include fromEndpoint and improve config extraction * refactor: conform userId field for anthropic/openai, cleanup anthropic typing * ci: add backward compatibility tests for getOpenAIConfig with various endpoints and configurations * ci: replace userId with user in clientOptions for getLLMConfig * test: add Azure OpenAI endpoint tests for various configurations in getOpenAIConfig * refactor: defaultHeaders retrieval for prompt caching for anthropic-based custom endpoint (litellm) * test: add unit tests for getOpenAIConfig with various Anthropic model configurations * test: enhance Anthropic compatibility tests with addParams and dropParams handling * chore: update @librechat/agents dependency to version 2.4.78 in package.json and package-lock.json * chore: update @librechat/agents dependency to version 2.4.79 in package.json and package-lock.json --------- Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
parent
7de6f6e44c
commit
c6ecf0095b
40 changed files with 1736 additions and 432 deletions
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@librechat/api",
|
||||
"version": "1.3.4",
|
||||
"version": "1.3.5",
|
||||
"type": "commonjs",
|
||||
"description": "MCP services for LibreChat",
|
||||
"main": "dist/index.js",
|
||||
|
@ -73,7 +73,7 @@
|
|||
},
|
||||
"peerDependencies": {
|
||||
"@langchain/core": "^0.3.62",
|
||||
"@librechat/agents": "^2.4.77",
|
||||
"@librechat/agents": "^2.4.79",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.17.1",
|
||||
"axios": "^1.8.2",
|
||||
|
|
132
packages/api/src/endpoints/anthropic/helpers.ts
Normal file
132
packages/api/src/endpoints/anthropic/helpers.ts
Normal file
|
@ -0,0 +1,132 @@
|
|||
import { logger } from '@librechat/data-schemas';
|
||||
import { AnthropicClientOptions } from '@librechat/agents';
|
||||
import { EModelEndpoint, anthropicSettings } from 'librechat-data-provider';
|
||||
import { matchModelName } from '~/utils/tokens';
|
||||
|
||||
/**
|
||||
* @param {string} modelName
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function checkPromptCacheSupport(modelName: string): boolean {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic) ?? '';
|
||||
if (
|
||||
modelMatch.includes('claude-3-5-sonnet-latest') ||
|
||||
modelMatch.includes('claude-3.5-sonnet-latest')
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
/claude-3[-.]7/.test(modelMatch) ||
|
||||
/claude-3[-.]5-(?:sonnet|haiku)/.test(modelMatch) ||
|
||||
/claude-3-(?:sonnet|haiku|opus)?/.test(modelMatch) ||
|
||||
/claude-(?:sonnet|opus|haiku)-[4-9]/.test(modelMatch) ||
|
||||
/claude-[4-9]-(?:sonnet|opus|haiku)?/.test(modelMatch) ||
|
||||
/claude-4(?:-(?:sonnet|opus|haiku))?/.test(modelMatch)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the appropriate headers for Claude models with cache control
|
||||
* @param {string} model The model name
|
||||
* @param {boolean} supportsCacheControl Whether the model supports cache control
|
||||
* @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
|
||||
*/
|
||||
function getClaudeHeaders(
|
||||
model: string,
|
||||
supportsCacheControl: boolean,
|
||||
): Record<string, string> | undefined {
|
||||
if (!supportsCacheControl) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (/claude-3[-.]5-sonnet/.test(model)) {
|
||||
return {
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (/claude-3[-.]7/.test(model)) {
|
||||
return {
|
||||
'anthropic-beta':
|
||||
'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31',
|
||||
};
|
||||
} else if (/claude-sonnet-4/.test(model)) {
|
||||
return {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31,context-1m-2025-08-07',
|
||||
};
|
||||
} else if (
|
||||
/claude-(?:sonnet|opus|haiku)-[4-9]/.test(model) ||
|
||||
/claude-[4-9]-(?:sonnet|opus|haiku)?/.test(model) ||
|
||||
/claude-4(?:-(?:sonnet|opus|haiku))?/.test(model)
|
||||
) {
|
||||
return {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures reasoning-related options for Claude models
|
||||
* @param {AnthropicClientOptions & { max_tokens?: number }} anthropicInput The request options object
|
||||
* @param {Object} extendedOptions Additional client configuration options
|
||||
* @param {boolean} extendedOptions.thinking Whether thinking is enabled in client config
|
||||
* @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
|
||||
* @returns {Object} Updated request options
|
||||
*/
|
||||
function configureReasoning(
|
||||
anthropicInput: AnthropicClientOptions & { max_tokens?: number },
|
||||
extendedOptions: { thinking?: boolean; thinkingBudget?: number | null } = {},
|
||||
): AnthropicClientOptions & { max_tokens?: number } {
|
||||
const updatedOptions = { ...anthropicInput };
|
||||
const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
|
||||
|
||||
if (
|
||||
extendedOptions.thinking &&
|
||||
updatedOptions?.model &&
|
||||
(/claude-3[-.]7/.test(updatedOptions.model) ||
|
||||
/claude-(?:sonnet|opus|haiku)-[4-9]/.test(updatedOptions.model))
|
||||
) {
|
||||
updatedOptions.thinking = {
|
||||
...updatedOptions.thinking,
|
||||
type: 'enabled',
|
||||
} as { type: 'enabled'; budget_tokens: number };
|
||||
}
|
||||
|
||||
if (
|
||||
updatedOptions.thinking != null &&
|
||||
extendedOptions.thinkingBudget != null &&
|
||||
updatedOptions.thinking.type === 'enabled'
|
||||
) {
|
||||
updatedOptions.thinking = {
|
||||
...updatedOptions.thinking,
|
||||
budget_tokens: extendedOptions.thinkingBudget,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
updatedOptions.thinking != null &&
|
||||
updatedOptions.thinking.type === 'enabled' &&
|
||||
(currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
|
||||
) {
|
||||
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model ?? '');
|
||||
updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
|
||||
|
||||
logger.warn(
|
||||
updatedOptions.max_tokens === maxTokens
|
||||
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
|
||||
: `[AnthropicClient] thinking budget_tokens (${updatedOptions.thinking.budget_tokens}) exceeds max_tokens (${updatedOptions.max_tokens}). Adjusting budget_tokens.`,
|
||||
);
|
||||
|
||||
updatedOptions.thinking.budget_tokens = Math.min(
|
||||
updatedOptions.thinking.budget_tokens,
|
||||
Math.floor(updatedOptions.max_tokens * 0.9),
|
||||
);
|
||||
}
|
||||
|
||||
return updatedOptions;
|
||||
}
|
||||
|
||||
export { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
|
2
packages/api/src/endpoints/anthropic/index.ts
Normal file
2
packages/api/src/endpoints/anthropic/index.ts
Normal file
|
@ -0,0 +1,2 @@
|
|||
export * from './helpers';
|
||||
export * from './llm';
|
1147
packages/api/src/endpoints/anthropic/llm.spec.ts
Normal file
1147
packages/api/src/endpoints/anthropic/llm.spec.ts
Normal file
File diff suppressed because it is too large
Load diff
105
packages/api/src/endpoints/anthropic/llm.ts
Normal file
105
packages/api/src/endpoints/anthropic/llm.ts
Normal file
|
@ -0,0 +1,105 @@
|
|||
import { Dispatcher, ProxyAgent } from 'undici';
|
||||
import { AnthropicClientOptions } from '@librechat/agents';
|
||||
import { anthropicSettings, removeNullishValues } from 'librechat-data-provider';
|
||||
import type { AnthropicLLMConfigResult, AnthropicConfigOptions } from '~/types/anthropic';
|
||||
import { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } from './helpers';
|
||||
|
||||
/**
|
||||
* Generates configuration options for creating an Anthropic language model (LLM) instance.
|
||||
* @param apiKey - The API key for authentication with Anthropic.
|
||||
* @param options={} - Additional options for configuring the LLM.
|
||||
* @returns Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
|
||||
*/
|
||||
function getLLMConfig(
|
||||
apiKey?: string,
|
||||
options: AnthropicConfigOptions = {} as AnthropicConfigOptions,
|
||||
): AnthropicLLMConfigResult {
|
||||
const systemOptions = {
|
||||
thinking: options.modelOptions?.thinking ?? anthropicSettings.thinking.default,
|
||||
promptCache: options.modelOptions?.promptCache ?? anthropicSettings.promptCache.default,
|
||||
thinkingBudget:
|
||||
options.modelOptions?.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
|
||||
};
|
||||
|
||||
/** Couldn't figure out a way to still loop through the object while deleting the overlapping keys when porting this
|
||||
* over from javascript, so for now they are being deleted manually until a better way presents itself.
|
||||
*/
|
||||
if (options.modelOptions) {
|
||||
delete options.modelOptions.thinking;
|
||||
delete options.modelOptions.promptCache;
|
||||
delete options.modelOptions.thinkingBudget;
|
||||
} else {
|
||||
throw new Error('No modelOptions provided');
|
||||
}
|
||||
|
||||
const defaultOptions = {
|
||||
model: anthropicSettings.model.default,
|
||||
maxOutputTokens: anthropicSettings.maxOutputTokens.default,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
const mergedOptions = Object.assign(defaultOptions, options.modelOptions);
|
||||
|
||||
let requestOptions: AnthropicClientOptions & { stream?: boolean } = {
|
||||
apiKey,
|
||||
model: mergedOptions.model,
|
||||
stream: mergedOptions.stream,
|
||||
temperature: mergedOptions.temperature,
|
||||
stopSequences: mergedOptions.stop,
|
||||
maxTokens:
|
||||
mergedOptions.maxOutputTokens || anthropicSettings.maxOutputTokens.reset(mergedOptions.model),
|
||||
clientOptions: {},
|
||||
invocationKwargs: {
|
||||
metadata: {
|
||||
user_id: mergedOptions.user,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
requestOptions = configureReasoning(requestOptions, systemOptions);
|
||||
|
||||
if (!/claude-3[-.]7/.test(mergedOptions.model)) {
|
||||
requestOptions.topP = mergedOptions.topP;
|
||||
requestOptions.topK = mergedOptions.topK;
|
||||
} else if (requestOptions.thinking == null) {
|
||||
requestOptions.topP = mergedOptions.topP;
|
||||
requestOptions.topK = mergedOptions.topK;
|
||||
}
|
||||
|
||||
const supportsCacheControl =
|
||||
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model ?? '');
|
||||
const headers = getClaudeHeaders(requestOptions.model ?? '', supportsCacheControl);
|
||||
if (headers && requestOptions.clientOptions) {
|
||||
requestOptions.clientOptions.defaultHeaders = headers;
|
||||
}
|
||||
|
||||
if (options.proxy && requestOptions.clientOptions) {
|
||||
const proxyAgent = new ProxyAgent(options.proxy);
|
||||
requestOptions.clientOptions.fetchOptions = {
|
||||
dispatcher: proxyAgent,
|
||||
};
|
||||
}
|
||||
|
||||
if (options.reverseProxyUrl && requestOptions.clientOptions) {
|
||||
requestOptions.clientOptions.baseURL = options.reverseProxyUrl;
|
||||
requestOptions.anthropicApiUrl = options.reverseProxyUrl;
|
||||
}
|
||||
|
||||
const tools = [];
|
||||
|
||||
if (mergedOptions.web_search) {
|
||||
tools.push({
|
||||
type: 'web_search_20250305',
|
||||
name: 'web_search',
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
tools,
|
||||
llmConfig: removeNullishValues(
|
||||
requestOptions as Record<string, unknown>,
|
||||
) as AnthropicClientOptions & { clientOptions?: { fetchOptions?: { dispatcher: Dispatcher } } },
|
||||
};
|
||||
}
|
||||
|
||||
export { getLLMConfig };
|
|
@ -1,5 +1,5 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { googleSettings, AuthKeys } from 'librechat-data-provider';
|
||||
import { googleSettings, AuthKeys, removeNullishValues } from 'librechat-data-provider';
|
||||
import type { GoogleClientOptions, VertexAIClientOptions } from '@librechat/agents';
|
||||
import type { GoogleAIToolType } from '@langchain/google-common';
|
||||
import type * as t from '~/types';
|
||||
|
@ -112,11 +112,15 @@ export function getGoogleConfig(
|
|||
...modelOptions
|
||||
} = options.modelOptions || {};
|
||||
|
||||
const llmConfig: GoogleClientOptions | VertexAIClientOptions = {
|
||||
const llmConfig: GoogleClientOptions | VertexAIClientOptions = removeNullishValues({
|
||||
...(modelOptions || {}),
|
||||
model: modelOptions?.model ?? '',
|
||||
maxRetries: 2,
|
||||
};
|
||||
topP: modelOptions?.topP ?? undefined,
|
||||
topK: modelOptions?.topK ?? undefined,
|
||||
temperature: modelOptions?.temperature ?? undefined,
|
||||
maxOutputTokens: modelOptions?.maxOutputTokens ?? undefined,
|
||||
});
|
||||
|
||||
/** Used only for Safety Settings */
|
||||
llmConfig.safetySettings = getSafetySettings(llmConfig.model);
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
export * from './custom';
|
||||
export * from './google';
|
||||
export * from './openai';
|
||||
export * from './anthropic';
|
||||
|
|
551
packages/api/src/endpoints/openai/config.anthropic.spec.ts
Normal file
551
packages/api/src/endpoints/openai/config.anthropic.spec.ts
Normal file
|
@ -0,0 +1,551 @@
|
|||
import { getOpenAIConfig } from './config';
|
||||
|
||||
describe('getOpenAIConfig - Anthropic Compatibility', () => {
|
||||
describe('Anthropic via LiteLLM', () => {
|
||||
it('should handle basic Anthropic configuration with defaultParamsEndpoint', () => {
|
||||
const apiKey = 'sk-xxxx';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-sonnet-4',
|
||||
user: 'some_user_id',
|
||||
},
|
||||
reverseProxyUrl: 'http://host.docker.internal:4000/v1',
|
||||
proxy: '',
|
||||
headers: {},
|
||||
addParams: undefined,
|
||||
dropParams: undefined,
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
paramDefinitions: [],
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-xxxx',
|
||||
model: 'claude-sonnet-4',
|
||||
stream: true,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'some_user_id',
|
||||
},
|
||||
thinking: {
|
||||
type: 'enabled',
|
||||
budget_tokens: 2000,
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://host.docker.internal:4000/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31,context-1m-2025-08-07',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Claude 3.7 model with thinking enabled', () => {
|
||||
const apiKey = 'sk-yyyy';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3.7-sonnet-20241022',
|
||||
user: 'user123',
|
||||
temperature: 0.7,
|
||||
thinking: true,
|
||||
thinkingBudget: 3000,
|
||||
},
|
||||
reverseProxyUrl: 'http://localhost:4000/v1',
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-yyyy',
|
||||
model: 'claude-3.7-sonnet-20241022',
|
||||
stream: true,
|
||||
temperature: 0.7,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'user123',
|
||||
},
|
||||
thinking: {
|
||||
type: 'enabled',
|
||||
budget_tokens: 3000,
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://localhost:4000/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta':
|
||||
'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Claude 3.7 model with thinking disabled (topP and topK included)', () => {
|
||||
const apiKey = 'sk-yyyy';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3.7-sonnet-20241022',
|
||||
user: 'user123',
|
||||
temperature: 0.7,
|
||||
topP: 0.9,
|
||||
topK: 50,
|
||||
thinking: false,
|
||||
},
|
||||
reverseProxyUrl: 'http://localhost:4000/v1',
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-yyyy',
|
||||
model: 'claude-3.7-sonnet-20241022',
|
||||
stream: true,
|
||||
temperature: 0.7,
|
||||
topP: 0.9,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'user123',
|
||||
},
|
||||
topK: 50,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://localhost:4000/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta':
|
||||
'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Claude 3.5 sonnet with special headers', () => {
|
||||
const apiKey = 'sk-zzzz';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3.5-sonnet-20240620',
|
||||
user: 'user456',
|
||||
maxOutputTokens: 4096,
|
||||
},
|
||||
reverseProxyUrl: 'https://api.anthropic.proxy.com/v1',
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-zzzz',
|
||||
model: 'claude-3.5-sonnet-20240620',
|
||||
stream: true,
|
||||
maxTokens: 4096,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'user456',
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://api.anthropic.proxy.com/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should apply anthropic-beta headers based on model pattern', () => {
|
||||
const apiKey = 'sk-custom';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3-sonnet',
|
||||
},
|
||||
reverseProxyUrl: 'http://custom.proxy/v1',
|
||||
headers: {
|
||||
'Custom-Header': 'custom-value',
|
||||
Authorization: 'Bearer custom-token',
|
||||
},
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-custom',
|
||||
model: 'claude-3-sonnet',
|
||||
stream: true,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: undefined,
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://custom.proxy/v1',
|
||||
defaultHeaders: {
|
||||
'Custom-Header': 'custom-value',
|
||||
Authorization: 'Bearer custom-token',
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle models that do not match Claude patterns', () => {
|
||||
const apiKey = 'sk-other';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'gpt-4-turbo',
|
||||
user: 'userGPT',
|
||||
temperature: 0.8,
|
||||
},
|
||||
reverseProxyUrl: 'http://litellm:4000/v1',
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-other',
|
||||
model: 'gpt-4-turbo',
|
||||
stream: true,
|
||||
temperature: 0.8,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'userGPT',
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://litellm:4000/v1',
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle dropParams correctly in Anthropic path', () => {
|
||||
const apiKey = 'sk-drop';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3-opus-20240229',
|
||||
user: 'userDrop',
|
||||
temperature: 0.5,
|
||||
maxOutputTokens: 2048,
|
||||
topP: 0.9,
|
||||
topK: 40,
|
||||
},
|
||||
reverseProxyUrl: 'http://proxy.litellm/v1',
|
||||
dropParams: ['temperature', 'topK', 'metadata'],
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-drop',
|
||||
model: 'claude-3-opus-20240229',
|
||||
stream: true,
|
||||
topP: 0.9,
|
||||
maxTokens: 2048,
|
||||
// temperature is dropped
|
||||
// modelKwargs.topK is dropped
|
||||
// modelKwargs.metadata is dropped completely
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://proxy.litellm/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty user string', () => {
|
||||
const apiKey = 'sk-edge';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-2.1',
|
||||
user: '',
|
||||
temperature: 0,
|
||||
},
|
||||
reverseProxyUrl: 'http://litellm/v1',
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-edge',
|
||||
model: 'claude-2.1',
|
||||
stream: true,
|
||||
temperature: 0,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://litellm/v1',
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle web_search tool', () => {
|
||||
const apiKey = 'sk-search';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3-opus-20240229',
|
||||
user: 'searchUser',
|
||||
web_search: true,
|
||||
},
|
||||
reverseProxyUrl: 'http://litellm/v1',
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-search',
|
||||
model: 'claude-3-opus-20240229',
|
||||
stream: true,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'searchUser',
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://litellm/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [
|
||||
{
|
||||
type: 'web_search_20250305',
|
||||
name: 'web_search',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should properly transform Anthropic config with invocationKwargs', () => {
|
||||
const apiKey = 'sk-test';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3.5-haiku-20241022',
|
||||
user: 'testUser',
|
||||
topP: 0.9,
|
||||
topK: 40,
|
||||
},
|
||||
reverseProxyUrl: 'http://litellm/v1',
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-test',
|
||||
model: 'claude-3.5-haiku-20241022',
|
||||
stream: true,
|
||||
topP: 0.9,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'testUser',
|
||||
},
|
||||
topK: 40,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://litellm/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle addParams with Anthropic defaults', () => {
|
||||
const apiKey = 'sk-add';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3-opus-20240229',
|
||||
user: 'addUser',
|
||||
temperature: 0.7,
|
||||
},
|
||||
reverseProxyUrl: 'http://litellm/v1',
|
||||
addParams: {
|
||||
customParam1: 'value1',
|
||||
customParam2: 42,
|
||||
frequencyPenalty: 0.5, // Known OpenAI param
|
||||
},
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-add',
|
||||
model: 'claude-3-opus-20240229',
|
||||
stream: true,
|
||||
temperature: 0.7,
|
||||
frequencyPenalty: 0.5, // Known param added to main config
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'addUser',
|
||||
},
|
||||
customParam1: 'value1', // Unknown params added to modelKwargs
|
||||
customParam2: 42,
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://litellm/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle both addParams and dropParams together', () => {
|
||||
const apiKey = 'sk-both';
|
||||
const endpoint = 'Anthropic (via LiteLLM)';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'claude-3.5-sonnet-20240620',
|
||||
user: 'bothUser',
|
||||
temperature: 0.6,
|
||||
topP: 0.9,
|
||||
topK: 40,
|
||||
},
|
||||
reverseProxyUrl: 'http://litellm/v1',
|
||||
addParams: {
|
||||
customParam: 'customValue',
|
||||
maxRetries: 3, // Known OpenAI param
|
||||
},
|
||||
dropParams: ['temperature', 'topK'], // Drop one known and one unknown param
|
||||
customParams: {
|
||||
defaultParamsEndpoint: 'anthropic',
|
||||
},
|
||||
endpoint: 'Anthropic (via LiteLLM)',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
apiKey: 'sk-both',
|
||||
model: 'claude-3.5-sonnet-20240620',
|
||||
stream: true,
|
||||
topP: 0.9,
|
||||
maxRetries: 3,
|
||||
maxTokens: 8192,
|
||||
modelKwargs: {
|
||||
metadata: {
|
||||
user_id: 'bothUser',
|
||||
},
|
||||
customParam: 'customValue',
|
||||
// topK is dropped
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'http://litellm/v1',
|
||||
defaultHeaders: {
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
431
packages/api/src/endpoints/openai/config.backward-compat.spec.ts
Normal file
431
packages/api/src/endpoints/openai/config.backward-compat.spec.ts
Normal file
|
@ -0,0 +1,431 @@
|
|||
import {
|
||||
Verbosity,
|
||||
EModelEndpoint,
|
||||
ReasoningEffort,
|
||||
ReasoningSummary,
|
||||
} from 'librechat-data-provider';
|
||||
import { getOpenAIConfig } from './config';
|
||||
|
||||
describe('getOpenAIConfig - Backward Compatibility', () => {
|
||||
describe('OpenAI endpoint', () => {
|
||||
it('should handle GPT-5 model with reasoning and web search', () => {
|
||||
const apiKey = 'sk-proj-somekey';
|
||||
const endpoint = undefined;
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'gpt-5-nano',
|
||||
verbosity: Verbosity.high,
|
||||
reasoning_effort: ReasoningEffort.high,
|
||||
reasoning_summary: ReasoningSummary.detailed,
|
||||
useResponsesApi: true,
|
||||
web_search: true,
|
||||
user: 'some-user',
|
||||
},
|
||||
proxy: '',
|
||||
reverseProxyUrl: null,
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'gpt-5-nano',
|
||||
useResponsesApi: true,
|
||||
user: 'some-user',
|
||||
apiKey: 'sk-proj-somekey',
|
||||
reasoning: {
|
||||
effort: ReasoningEffort.high,
|
||||
summary: ReasoningSummary.detailed,
|
||||
},
|
||||
modelKwargs: {
|
||||
text: {
|
||||
verbosity: Verbosity.high,
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {},
|
||||
tools: [
|
||||
{
|
||||
type: 'web_search_preview',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('OpenRouter endpoint', () => {
|
||||
it('should handle OpenRouter configuration with dropParams and custom headers', () => {
|
||||
const apiKey = 'sk-xxxx';
|
||||
const endpoint = 'OpenRouter';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'qwen/qwen3-max',
|
||||
user: 'some-user',
|
||||
},
|
||||
reverseProxyUrl: 'https://gateway.ai.cloudflare.com/v1/account-id/gateway-id/openrouter',
|
||||
headers: {
|
||||
'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}',
|
||||
'x-test-key': '{{TESTING_USER_VAR}}',
|
||||
},
|
||||
proxy: '',
|
||||
dropParams: ['user'],
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'qwen/qwen3-max',
|
||||
include_reasoning: true,
|
||||
apiKey: 'sk-xxxx',
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://gateway.ai.cloudflare.com/v1/account-id/gateway-id/openrouter',
|
||||
defaultHeaders: {
|
||||
'HTTP-Referer': 'https://librechat.ai',
|
||||
'X-Title': 'LibreChat',
|
||||
'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}',
|
||||
'x-test-key': '{{TESTING_USER_VAR}}',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
provider: 'openrouter',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Azure OpenAI endpoint', () => {
|
||||
it('should handle basic Azure OpenAI configuration', () => {
|
||||
const apiKey = 'some_key';
|
||||
const endpoint = undefined;
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'gpt-4o',
|
||||
user: 'some_user_id',
|
||||
},
|
||||
reverseProxyUrl: null,
|
||||
endpoint: 'azureOpenAI',
|
||||
azure: {
|
||||
azureOpenAIApiKey: 'some_azure_key',
|
||||
azureOpenAIApiInstanceName: 'some_instance_name',
|
||||
azureOpenAIApiDeploymentName: 'gpt-4o',
|
||||
azureOpenAIApiVersion: '2024-02-15-preview',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'gpt-4o',
|
||||
user: 'some_user_id',
|
||||
azureOpenAIApiKey: 'some_azure_key',
|
||||
azureOpenAIApiInstanceName: 'some_instance_name',
|
||||
azureOpenAIApiDeploymentName: 'gpt-4o',
|
||||
azureOpenAIApiVersion: '2024-02-15-preview',
|
||||
},
|
||||
configOptions: {},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Azure OpenAI with Responses API and reasoning', () => {
|
||||
const apiKey = 'some_azure_key';
|
||||
const endpoint = undefined;
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'gpt-5',
|
||||
reasoning_effort: ReasoningEffort.high,
|
||||
reasoning_summary: ReasoningSummary.detailed,
|
||||
verbosity: Verbosity.high,
|
||||
useResponsesApi: true,
|
||||
user: 'some_user_id',
|
||||
},
|
||||
endpoint: 'azureOpenAI',
|
||||
azure: {
|
||||
azureOpenAIApiKey: 'some_azure_key',
|
||||
azureOpenAIApiInstanceName: 'some_instance_name',
|
||||
azureOpenAIApiDeploymentName: 'gpt-5',
|
||||
azureOpenAIApiVersion: '2024-12-01-preview',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'gpt-5',
|
||||
useResponsesApi: true,
|
||||
user: 'some_user_id',
|
||||
apiKey: 'some_azure_key',
|
||||
reasoning: {
|
||||
effort: ReasoningEffort.high,
|
||||
summary: ReasoningSummary.detailed,
|
||||
},
|
||||
modelKwargs: {
|
||||
text: {
|
||||
verbosity: Verbosity.high,
|
||||
},
|
||||
},
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://some_instance_name.openai.azure.com/openai/v1',
|
||||
defaultHeaders: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
defaultQuery: {
|
||||
'api-version': 'preview',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Azure serverless configuration with dropParams', () => {
|
||||
const apiKey = 'some_azure_key';
|
||||
const endpoint = undefined;
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'jais-30b-chat',
|
||||
user: 'some_user_id',
|
||||
},
|
||||
reverseProxyUrl: 'https://some_endpoint_name.services.ai.azure.com/models',
|
||||
endpoint: 'azureOpenAI',
|
||||
headers: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
dropParams: ['stream_options', 'user'],
|
||||
azure: false as const,
|
||||
defaultQuery: {
|
||||
'api-version': '2024-05-01-preview',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'jais-30b-chat',
|
||||
apiKey: 'some_azure_key',
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://some_endpoint_name.services.ai.azure.com/models',
|
||||
defaultHeaders: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
defaultQuery: {
|
||||
'api-version': '2024-05-01-preview',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Azure serverless with user-provided key configuration', () => {
|
||||
const apiKey = 'some_azure_key';
|
||||
const endpoint = undefined;
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'grok-3',
|
||||
user: 'some_user_id',
|
||||
},
|
||||
reverseProxyUrl: 'https://some_endpoint_name.services.ai.azure.com/models',
|
||||
endpoint: 'azureOpenAI',
|
||||
headers: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
dropParams: ['stream_options', 'user'],
|
||||
azure: false as const,
|
||||
defaultQuery: {
|
||||
'api-version': '2024-05-01-preview',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'grok-3',
|
||||
apiKey: 'some_azure_key',
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://some_endpoint_name.services.ai.azure.com/models',
|
||||
defaultHeaders: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
defaultQuery: {
|
||||
'api-version': '2024-05-01-preview',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Azure serverless with Mistral model configuration', () => {
|
||||
const apiKey = 'some_azure_key';
|
||||
const endpoint = undefined;
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'Mistral-Large-2411',
|
||||
user: 'some_user_id',
|
||||
},
|
||||
reverseProxyUrl: 'https://some_endpoint_name.services.ai.azure.com/models',
|
||||
endpoint: 'azureOpenAI',
|
||||
headers: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
dropParams: ['stream_options', 'user'],
|
||||
azure: false as const,
|
||||
defaultQuery: {
|
||||
'api-version': '2024-05-01-preview',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'Mistral-Large-2411',
|
||||
apiKey: 'some_azure_key',
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://some_endpoint_name.services.ai.azure.com/models',
|
||||
defaultHeaders: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
defaultQuery: {
|
||||
'api-version': '2024-05-01-preview',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Azure serverless with DeepSeek model without dropParams', () => {
|
||||
const apiKey = 'some_azure_key';
|
||||
const endpoint = undefined;
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'DeepSeek-R1',
|
||||
user: 'some_user_id',
|
||||
},
|
||||
reverseProxyUrl: 'https://some_endpoint_name.models.ai.azure.com/v1/',
|
||||
endpoint: 'azureOpenAI',
|
||||
headers: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
azure: false as const,
|
||||
defaultQuery: {
|
||||
'api-version': '2024-08-01-preview',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'DeepSeek-R1',
|
||||
user: 'some_user_id',
|
||||
apiKey: 'some_azure_key',
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://some_endpoint_name.models.ai.azure.com/v1/',
|
||||
defaultHeaders: {
|
||||
'api-key': 'some_azure_key',
|
||||
},
|
||||
defaultQuery: {
|
||||
'api-version': '2024-08-01-preview',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Custom endpoints', () => {
|
||||
it('should handle Groq custom endpoint configuration', () => {
|
||||
const apiKey = 'gsk_somekey';
|
||||
const endpoint = 'groq';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: 'qwen/qwen3-32b',
|
||||
user: 'some-user',
|
||||
},
|
||||
reverseProxyUrl: 'https://api.groq.com/openai/v1/',
|
||||
proxy: '',
|
||||
headers: {},
|
||||
endpoint: 'groq',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: 'qwen/qwen3-32b',
|
||||
user: 'some-user',
|
||||
apiKey: 'gsk_somekey',
|
||||
},
|
||||
configOptions: {
|
||||
baseURL: 'https://api.groq.com/openai/v1/',
|
||||
defaultHeaders: {},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Cloudflare Workers AI with custom headers and addParams', () => {
|
||||
const apiKey = 'someKey';
|
||||
const endpoint = 'Cloudflare Workers AI';
|
||||
const options = {
|
||||
modelOptions: {
|
||||
model: '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b',
|
||||
user: 'some-user',
|
||||
},
|
||||
reverseProxyUrl:
|
||||
'https://gateway.ai.cloudflare.com/v1/${CF_ACCOUNT_ID}/${CF_GATEWAY_ID}/workers-ai/v1',
|
||||
proxy: '',
|
||||
headers: {
|
||||
'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}',
|
||||
'x-test-key': '{{TESTING_USER_VAR}}',
|
||||
},
|
||||
addParams: {
|
||||
disableStreaming: true,
|
||||
},
|
||||
endpoint: 'Cloudflare Workers AI',
|
||||
endpointType: 'custom',
|
||||
};
|
||||
|
||||
const result = getOpenAIConfig(apiKey, options, endpoint);
|
||||
|
||||
expect(result).toEqual({
|
||||
llmConfig: {
|
||||
streaming: true,
|
||||
model: '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b',
|
||||
user: 'some-user',
|
||||
disableStreaming: true,
|
||||
apiKey: 'someKey',
|
||||
},
|
||||
configOptions: {
|
||||
baseURL:
|
||||
'https://gateway.ai.cloudflare.com/v1/${CF_ACCOUNT_ID}/${CF_GATEWAY_ID}/workers-ai/v1',
|
||||
defaultHeaders: {
|
||||
'x-librechat-thread-id': '{{LIBRECHAT_BODY_CONVERSATIONID}}',
|
||||
'x-test-key': '{{TESTING_USER_VAR}}',
|
||||
},
|
||||
},
|
||||
tools: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,7 +1,8 @@
|
|||
import { Verbosity, ReasoningEffort, ReasoningSummary } from 'librechat-data-provider';
|
||||
import type { RequestInit } from 'undici';
|
||||
import type { OpenAIParameters, AzureOptions } from '~/types';
|
||||
import { getOpenAIConfig, knownOpenAIParams } from './llm';
|
||||
import { getOpenAIConfig } from './config';
|
||||
import { knownOpenAIParams } from './llm';
|
||||
|
||||
describe('getOpenAIConfig', () => {
|
||||
const mockApiKey = 'test-api-key';
|
150
packages/api/src/endpoints/openai/config.ts
Normal file
150
packages/api/src/endpoints/openai/config.ts
Normal file
|
@ -0,0 +1,150 @@
|
|||
import { ProxyAgent } from 'undici';
|
||||
import { Providers } from '@librechat/agents';
|
||||
import { KnownEndpoints, EModelEndpoint } from 'librechat-data-provider';
|
||||
import type * as t from '~/types';
|
||||
import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm';
|
||||
import { transformToOpenAIConfig } from './transform';
|
||||
import { constructAzureURL } from '~/utils/azure';
|
||||
import { createFetch } from '~/utils/generators';
|
||||
import { getOpenAILLMConfig } from './llm';
|
||||
|
||||
type Fetch = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
||||
|
||||
/**
|
||||
* Generates configuration options for creating a language model (LLM) instance.
|
||||
* @param apiKey - The API key for authentication.
|
||||
* @param options - Additional options for configuring the LLM.
|
||||
* @param endpoint - The endpoint name
|
||||
* @returns Configuration options for creating an LLM instance.
|
||||
*/
|
||||
export function getOpenAIConfig(
|
||||
apiKey: string,
|
||||
options: t.OpenAIConfigOptions = {},
|
||||
endpoint?: string | null,
|
||||
): t.OpenAIConfigResult {
|
||||
const {
|
||||
proxy,
|
||||
addParams,
|
||||
dropParams,
|
||||
defaultQuery,
|
||||
directEndpoint,
|
||||
streaming = true,
|
||||
modelOptions = {},
|
||||
reverseProxyUrl: baseURL,
|
||||
} = options;
|
||||
|
||||
let llmConfig: t.OAIClientOptions;
|
||||
let tools: t.LLMConfigResult['tools'];
|
||||
const isAnthropic = options.customParams?.defaultParamsEndpoint === EModelEndpoint.anthropic;
|
||||
|
||||
const useOpenRouter =
|
||||
!isAnthropic &&
|
||||
((baseURL && baseURL.includes(KnownEndpoints.openrouter)) ||
|
||||
(endpoint != null && endpoint.toLowerCase().includes(KnownEndpoints.openrouter)));
|
||||
|
||||
let azure = options.azure;
|
||||
let headers = options.headers;
|
||||
if (isAnthropic) {
|
||||
const anthropicResult = getAnthropicLLMConfig(apiKey, {
|
||||
modelOptions,
|
||||
proxy: options.proxy,
|
||||
});
|
||||
const transformed = transformToOpenAIConfig({
|
||||
addParams,
|
||||
dropParams,
|
||||
llmConfig: anthropicResult.llmConfig,
|
||||
fromEndpoint: EModelEndpoint.anthropic,
|
||||
});
|
||||
llmConfig = transformed.llmConfig;
|
||||
tools = anthropicResult.tools;
|
||||
if (transformed.configOptions?.defaultHeaders) {
|
||||
headers = Object.assign(headers ?? {}, transformed.configOptions?.defaultHeaders);
|
||||
}
|
||||
} else {
|
||||
const openaiResult = getOpenAILLMConfig({
|
||||
azure,
|
||||
apiKey,
|
||||
baseURL,
|
||||
streaming,
|
||||
addParams,
|
||||
dropParams,
|
||||
modelOptions,
|
||||
useOpenRouter,
|
||||
});
|
||||
llmConfig = openaiResult.llmConfig;
|
||||
azure = openaiResult.azure;
|
||||
tools = openaiResult.tools;
|
||||
}
|
||||
|
||||
const configOptions: t.OpenAIConfiguration = {};
|
||||
if (baseURL) {
|
||||
configOptions.baseURL = baseURL;
|
||||
}
|
||||
if (useOpenRouter) {
|
||||
configOptions.defaultHeaders = Object.assign(
|
||||
{
|
||||
'HTTP-Referer': 'https://librechat.ai',
|
||||
'X-Title': 'LibreChat',
|
||||
},
|
||||
headers,
|
||||
);
|
||||
} else if (headers) {
|
||||
configOptions.defaultHeaders = headers;
|
||||
}
|
||||
|
||||
if (defaultQuery) {
|
||||
configOptions.defaultQuery = defaultQuery;
|
||||
}
|
||||
|
||||
if (proxy) {
|
||||
const proxyAgent = new ProxyAgent(proxy);
|
||||
configOptions.fetchOptions = {
|
||||
dispatcher: proxyAgent,
|
||||
};
|
||||
}
|
||||
|
||||
if (azure && !isAnthropic) {
|
||||
const constructAzureResponsesApi = () => {
|
||||
if (!llmConfig.useResponsesApi || !azure) {
|
||||
return;
|
||||
}
|
||||
|
||||
configOptions.baseURL = constructAzureURL({
|
||||
baseURL: configOptions.baseURL || 'https://${INSTANCE_NAME}.openai.azure.com/openai/v1',
|
||||
azureOptions: azure,
|
||||
});
|
||||
|
||||
configOptions.defaultHeaders = {
|
||||
...configOptions.defaultHeaders,
|
||||
'api-key': apiKey,
|
||||
};
|
||||
configOptions.defaultQuery = {
|
||||
...configOptions.defaultQuery,
|
||||
'api-version': configOptions.defaultQuery?.['api-version'] ?? 'preview',
|
||||
};
|
||||
};
|
||||
|
||||
constructAzureResponsesApi();
|
||||
}
|
||||
|
||||
if (process.env.OPENAI_ORGANIZATION && !isAnthropic) {
|
||||
configOptions.organization = process.env.OPENAI_ORGANIZATION;
|
||||
}
|
||||
|
||||
if (directEndpoint === true && configOptions?.baseURL != null) {
|
||||
configOptions.fetch = createFetch({
|
||||
directEndpoint: directEndpoint,
|
||||
reverseProxyUrl: configOptions?.baseURL,
|
||||
}) as unknown as Fetch;
|
||||
}
|
||||
|
||||
const result: t.OpenAIConfigResult = {
|
||||
llmConfig,
|
||||
configOptions,
|
||||
tools,
|
||||
};
|
||||
if (useOpenRouter) {
|
||||
result.provider = Providers.OPENROUTER;
|
||||
}
|
||||
return result;
|
||||
}
|
|
@ -1,2 +1,3 @@
|
|||
export * from './llm';
|
||||
export * from './config';
|
||||
export * from './initialize';
|
||||
|
|
|
@ -9,7 +9,7 @@ import { createHandleLLMNewToken } from '~/utils/generators';
|
|||
import { getAzureCredentials } from '~/utils/azure';
|
||||
import { isUserProvided } from '~/utils/common';
|
||||
import { resolveHeaders } from '~/utils/env';
|
||||
import { getOpenAIConfig } from './llm';
|
||||
import { getOpenAIConfig } from './config';
|
||||
|
||||
/**
|
||||
* Initializes OpenAI options for agent usage. This function always returns configuration
|
||||
|
@ -115,7 +115,7 @@ export const initializeOpenAI = async ({
|
|||
} else if (isAzureOpenAI) {
|
||||
clientOptions.azure =
|
||||
userProvidesKey && userValues?.apiKey ? JSON.parse(userValues.apiKey) : getAzureCredentials();
|
||||
apiKey = clientOptions.azure?.azureOpenAIApiKey;
|
||||
apiKey = clientOptions.azure ? clientOptions.azure.azureOpenAIApiKey : undefined;
|
||||
}
|
||||
|
||||
if (userProvidesKey && !apiKey) {
|
||||
|
|
|
@ -1,16 +1,11 @@
|
|||
import { ProxyAgent } from 'undici';
|
||||
import { Providers } from '@librechat/agents';
|
||||
import { KnownEndpoints, removeNullishValues } from 'librechat-data-provider';
|
||||
import { removeNullishValues } from 'librechat-data-provider';
|
||||
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
||||
import type { AzureOpenAIInput } from '@langchain/openai';
|
||||
import type { OpenAI } from 'openai';
|
||||
import type * as t from '~/types';
|
||||
import { sanitizeModelName, constructAzureURL } from '~/utils/azure';
|
||||
import { createFetch } from '~/utils/generators';
|
||||
import { isEnabled } from '~/utils/common';
|
||||
|
||||
type Fetch = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
||||
|
||||
export const knownOpenAIParams = new Set([
|
||||
// Constructor/Instance Parameters
|
||||
'model',
|
||||
|
@ -80,47 +75,44 @@ function hasReasoningParams({
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates configuration options for creating a language model (LLM) instance.
|
||||
* @param apiKey - The API key for authentication.
|
||||
* @param options - Additional options for configuring the LLM.
|
||||
* @param endpoint - The endpoint name
|
||||
* @returns Configuration options for creating an LLM instance.
|
||||
*/
|
||||
export function getOpenAIConfig(
|
||||
apiKey: string,
|
||||
options: t.OpenAIConfigOptions = {},
|
||||
endpoint?: string | null,
|
||||
): t.LLMConfigResult {
|
||||
const {
|
||||
modelOptions: _modelOptions = {},
|
||||
reverseProxyUrl,
|
||||
directEndpoint,
|
||||
defaultQuery,
|
||||
headers,
|
||||
proxy,
|
||||
azure,
|
||||
streaming = true,
|
||||
addParams,
|
||||
dropParams,
|
||||
} = options;
|
||||
export function getOpenAILLMConfig({
|
||||
azure,
|
||||
apiKey,
|
||||
baseURL,
|
||||
streaming,
|
||||
addParams,
|
||||
dropParams,
|
||||
useOpenRouter,
|
||||
modelOptions: _modelOptions,
|
||||
}: {
|
||||
apiKey: string;
|
||||
streaming: boolean;
|
||||
baseURL?: string | null;
|
||||
modelOptions: Partial<t.OpenAIParameters>;
|
||||
addParams?: Record<string, unknown>;
|
||||
dropParams?: string[];
|
||||
useOpenRouter?: boolean;
|
||||
azure?: false | t.AzureOptions;
|
||||
}): Pick<t.LLMConfigResult, 'llmConfig' | 'tools'> & {
|
||||
azure?: t.AzureOptions;
|
||||
} {
|
||||
const {
|
||||
reasoning_effort,
|
||||
reasoning_summary,
|
||||
verbosity,
|
||||
web_search,
|
||||
frequency_penalty,
|
||||
presence_penalty,
|
||||
...modelOptions
|
||||
} = _modelOptions;
|
||||
const llmConfig: Partial<t.ClientOptions> &
|
||||
Partial<t.OpenAIParameters> &
|
||||
Partial<AzureOpenAIInput> = Object.assign(
|
||||
|
||||
const llmConfig = Object.assign(
|
||||
{
|
||||
streaming,
|
||||
model: modelOptions.model ?? '',
|
||||
},
|
||||
modelOptions,
|
||||
);
|
||||
) as Partial<t.OAIClientOptions> & Partial<t.OpenAIParameters> & Partial<AzureOpenAIInput>;
|
||||
|
||||
if (frequency_penalty != null) {
|
||||
llmConfig.frequencyPenalty = frequency_penalty;
|
||||
|
@ -148,104 +140,8 @@ export function getOpenAIConfig(
|
|||
}
|
||||
}
|
||||
|
||||
let useOpenRouter = false;
|
||||
const configOptions: t.OpenAIConfiguration = {};
|
||||
|
||||
if (
|
||||
(reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) ||
|
||||
(endpoint && endpoint.toLowerCase().includes(KnownEndpoints.openrouter))
|
||||
) {
|
||||
useOpenRouter = true;
|
||||
if (useOpenRouter) {
|
||||
llmConfig.include_reasoning = true;
|
||||
configOptions.baseURL = reverseProxyUrl;
|
||||
configOptions.defaultHeaders = Object.assign(
|
||||
{
|
||||
'HTTP-Referer': 'https://librechat.ai',
|
||||
'X-Title': 'LibreChat',
|
||||
},
|
||||
headers,
|
||||
);
|
||||
} else if (reverseProxyUrl) {
|
||||
configOptions.baseURL = reverseProxyUrl;
|
||||
if (headers) {
|
||||
configOptions.defaultHeaders = headers;
|
||||
}
|
||||
}
|
||||
|
||||
if (defaultQuery) {
|
||||
configOptions.defaultQuery = defaultQuery;
|
||||
}
|
||||
|
||||
if (proxy) {
|
||||
const proxyAgent = new ProxyAgent(proxy);
|
||||
configOptions.fetchOptions = {
|
||||
dispatcher: proxyAgent,
|
||||
};
|
||||
}
|
||||
|
||||
if (azure) {
|
||||
const useModelName = isEnabled(process.env.AZURE_USE_MODEL_AS_DEPLOYMENT_NAME);
|
||||
const updatedAzure = { ...azure };
|
||||
updatedAzure.azureOpenAIApiDeploymentName = useModelName
|
||||
? sanitizeModelName(llmConfig.model || '')
|
||||
: azure.azureOpenAIApiDeploymentName;
|
||||
|
||||
if (process.env.AZURE_OPENAI_DEFAULT_MODEL) {
|
||||
llmConfig.model = process.env.AZURE_OPENAI_DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
const constructBaseURL = () => {
|
||||
if (!configOptions.baseURL) {
|
||||
return;
|
||||
}
|
||||
const azureURL = constructAzureURL({
|
||||
baseURL: configOptions.baseURL,
|
||||
azureOptions: updatedAzure,
|
||||
});
|
||||
updatedAzure.azureOpenAIBasePath = azureURL.split(
|
||||
`/${updatedAzure.azureOpenAIApiDeploymentName}`,
|
||||
)[0];
|
||||
};
|
||||
|
||||
constructBaseURL();
|
||||
Object.assign(llmConfig, updatedAzure);
|
||||
|
||||
const constructAzureResponsesApi = () => {
|
||||
if (!llmConfig.useResponsesApi) {
|
||||
return;
|
||||
}
|
||||
|
||||
configOptions.baseURL = constructAzureURL({
|
||||
baseURL: configOptions.baseURL || 'https://${INSTANCE_NAME}.openai.azure.com/openai/v1',
|
||||
azureOptions: llmConfig,
|
||||
});
|
||||
|
||||
delete llmConfig.azureOpenAIApiDeploymentName;
|
||||
delete llmConfig.azureOpenAIApiInstanceName;
|
||||
delete llmConfig.azureOpenAIApiVersion;
|
||||
delete llmConfig.azureOpenAIBasePath;
|
||||
delete llmConfig.azureOpenAIApiKey;
|
||||
llmConfig.apiKey = apiKey;
|
||||
|
||||
configOptions.defaultHeaders = {
|
||||
...configOptions.defaultHeaders,
|
||||
'api-key': apiKey,
|
||||
};
|
||||
configOptions.defaultQuery = {
|
||||
...configOptions.defaultQuery,
|
||||
'api-version': configOptions.defaultQuery?.['api-version'] ?? 'preview',
|
||||
};
|
||||
};
|
||||
|
||||
constructAzureResponsesApi();
|
||||
|
||||
llmConfig.model = updatedAzure.azureOpenAIApiDeploymentName;
|
||||
} else {
|
||||
llmConfig.apiKey = apiKey;
|
||||
}
|
||||
|
||||
if (process.env.OPENAI_ORGANIZATION && azure) {
|
||||
configOptions.organization = process.env.OPENAI_ORGANIZATION;
|
||||
}
|
||||
|
||||
if (
|
||||
|
@ -270,7 +166,7 @@ export function getOpenAIConfig(
|
|||
|
||||
const tools: BindToolsInput[] = [];
|
||||
|
||||
if (modelOptions.web_search) {
|
||||
if (web_search) {
|
||||
llmConfig.useResponsesApi = true;
|
||||
tools.push({ type: 'web_search_preview' });
|
||||
}
|
||||
|
@ -278,7 +174,7 @@ export function getOpenAIConfig(
|
|||
/**
|
||||
* Note: OpenAI Web Search models do not support any known parameters besides `max_tokens`
|
||||
*/
|
||||
if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model)) {
|
||||
if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model as string)) {
|
||||
const searchExcludeParams = [
|
||||
'frequency_penalty',
|
||||
'presence_penalty',
|
||||
|
@ -301,13 +197,13 @@ export function getOpenAIConfig(
|
|||
|
||||
combinedDropParams.forEach((param) => {
|
||||
if (param in llmConfig) {
|
||||
delete llmConfig[param as keyof t.ClientOptions];
|
||||
delete llmConfig[param as keyof t.OAIClientOptions];
|
||||
}
|
||||
});
|
||||
} else if (dropParams && Array.isArray(dropParams)) {
|
||||
dropParams.forEach((param) => {
|
||||
if (param in llmConfig) {
|
||||
delete llmConfig[param as keyof t.ClientOptions];
|
||||
delete llmConfig[param as keyof t.OAIClientOptions];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -329,20 +225,52 @@ export function getOpenAIConfig(
|
|||
llmConfig.modelKwargs = modelKwargs;
|
||||
}
|
||||
|
||||
if (directEndpoint === true && configOptions?.baseURL != null) {
|
||||
configOptions.fetch = createFetch({
|
||||
directEndpoint: directEndpoint,
|
||||
reverseProxyUrl: configOptions?.baseURL,
|
||||
}) as unknown as Fetch;
|
||||
if (!azure) {
|
||||
llmConfig.apiKey = apiKey;
|
||||
return { llmConfig, tools };
|
||||
}
|
||||
|
||||
const result: t.LLMConfigResult = {
|
||||
llmConfig,
|
||||
configOptions,
|
||||
tools,
|
||||
};
|
||||
if (useOpenRouter) {
|
||||
result.provider = Providers.OPENROUTER;
|
||||
const useModelName = isEnabled(process.env.AZURE_USE_MODEL_AS_DEPLOYMENT_NAME);
|
||||
const updatedAzure = { ...azure };
|
||||
updatedAzure.azureOpenAIApiDeploymentName = useModelName
|
||||
? sanitizeModelName(llmConfig.model || '')
|
||||
: azure.azureOpenAIApiDeploymentName;
|
||||
|
||||
if (process.env.AZURE_OPENAI_DEFAULT_MODEL) {
|
||||
llmConfig.model = process.env.AZURE_OPENAI_DEFAULT_MODEL;
|
||||
}
|
||||
return result;
|
||||
|
||||
const constructAzureOpenAIBasePath = () => {
|
||||
if (!baseURL) {
|
||||
return;
|
||||
}
|
||||
const azureURL = constructAzureURL({
|
||||
baseURL,
|
||||
azureOptions: updatedAzure,
|
||||
});
|
||||
updatedAzure.azureOpenAIBasePath = azureURL.split(
|
||||
`/${updatedAzure.azureOpenAIApiDeploymentName}`,
|
||||
)[0];
|
||||
};
|
||||
|
||||
constructAzureOpenAIBasePath();
|
||||
Object.assign(llmConfig, updatedAzure);
|
||||
|
||||
const constructAzureResponsesApi = () => {
|
||||
if (!llmConfig.useResponsesApi) {
|
||||
return;
|
||||
}
|
||||
|
||||
delete llmConfig.azureOpenAIApiDeploymentName;
|
||||
delete llmConfig.azureOpenAIApiInstanceName;
|
||||
delete llmConfig.azureOpenAIApiVersion;
|
||||
delete llmConfig.azureOpenAIBasePath;
|
||||
delete llmConfig.azureOpenAIApiKey;
|
||||
llmConfig.apiKey = apiKey;
|
||||
};
|
||||
|
||||
constructAzureResponsesApi();
|
||||
|
||||
llmConfig.model = updatedAzure.azureOpenAIApiDeploymentName;
|
||||
return { llmConfig, tools, azure: updatedAzure };
|
||||
}
|
||||
|
|
95
packages/api/src/endpoints/openai/transform.ts
Normal file
95
packages/api/src/endpoints/openai/transform.ts
Normal file
|
@ -0,0 +1,95 @@
|
|||
import { EModelEndpoint } from 'librechat-data-provider';
|
||||
import type { ClientOptions } from '@librechat/agents';
|
||||
import type * as t from '~/types';
|
||||
import { knownOpenAIParams } from './llm';
|
||||
|
||||
const anthropicExcludeParams = new Set(['anthropicApiUrl']);
|
||||
|
||||
/**
|
||||
* Transforms a Non-OpenAI LLM config to an OpenAI-conformant config.
|
||||
* Non-OpenAI parameters are moved to modelKwargs.
|
||||
* Also extracts configuration options that belong in configOptions.
|
||||
* Handles addParams and dropParams for parameter customization.
|
||||
*/
|
||||
export function transformToOpenAIConfig({
|
||||
addParams,
|
||||
dropParams,
|
||||
llmConfig,
|
||||
fromEndpoint,
|
||||
}: {
|
||||
addParams?: Record<string, unknown>;
|
||||
dropParams?: string[];
|
||||
llmConfig: ClientOptions;
|
||||
fromEndpoint: string;
|
||||
}): {
|
||||
llmConfig: t.OAIClientOptions;
|
||||
configOptions: Partial<t.OpenAIConfiguration>;
|
||||
} {
|
||||
const openAIConfig: Partial<t.OAIClientOptions> = {};
|
||||
let configOptions: Partial<t.OpenAIConfiguration> = {};
|
||||
let modelKwargs: Record<string, unknown> = {};
|
||||
let hasModelKwargs = false;
|
||||
|
||||
const isAnthropic = fromEndpoint === EModelEndpoint.anthropic;
|
||||
const excludeParams = isAnthropic ? anthropicExcludeParams : new Set();
|
||||
|
||||
for (const [key, value] of Object.entries(llmConfig)) {
|
||||
if (value === undefined || value === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (excludeParams.has(key)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isAnthropic && key === 'clientOptions') {
|
||||
configOptions = Object.assign({}, configOptions, value as Partial<t.OpenAIConfiguration>);
|
||||
continue;
|
||||
} else if (isAnthropic && key === 'invocationKwargs') {
|
||||
modelKwargs = Object.assign({}, modelKwargs, value as Record<string, unknown>);
|
||||
hasModelKwargs = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (knownOpenAIParams.has(key)) {
|
||||
(openAIConfig as Record<string, unknown>)[key] = value;
|
||||
} else {
|
||||
modelKwargs[key] = value;
|
||||
hasModelKwargs = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (addParams && typeof addParams === 'object') {
|
||||
for (const [key, value] of Object.entries(addParams)) {
|
||||
if (knownOpenAIParams.has(key)) {
|
||||
(openAIConfig as Record<string, unknown>)[key] = value;
|
||||
} else {
|
||||
modelKwargs[key] = value;
|
||||
hasModelKwargs = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasModelKwargs) {
|
||||
openAIConfig.modelKwargs = modelKwargs;
|
||||
}
|
||||
|
||||
if (dropParams && Array.isArray(dropParams)) {
|
||||
dropParams.forEach((param) => {
|
||||
if (param in openAIConfig) {
|
||||
delete openAIConfig[param as keyof t.OAIClientOptions];
|
||||
}
|
||||
if (openAIConfig.modelKwargs && param in openAIConfig.modelKwargs) {
|
||||
delete openAIConfig.modelKwargs[param];
|
||||
if (Object.keys(openAIConfig.modelKwargs).length === 0) {
|
||||
delete openAIConfig.modelKwargs;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
llmConfig: openAIConfig as t.OAIClientOptions,
|
||||
configOptions,
|
||||
};
|
||||
}
|
69
packages/api/src/types/anthropic.ts
Normal file
69
packages/api/src/types/anthropic.ts
Normal file
|
@ -0,0 +1,69 @@
|
|||
import { z } from 'zod';
|
||||
import { Dispatcher } from 'undici';
|
||||
import { anthropicSchema } from 'librechat-data-provider';
|
||||
import type { AnthropicClientOptions } from '@librechat/agents';
|
||||
import type { LLMConfigResult } from './openai';
|
||||
|
||||
export type AnthropicParameters = z.infer<typeof anthropicSchema>;
|
||||
|
||||
export interface ThinkingConfigDisabled {
|
||||
type: 'disabled';
|
||||
}
|
||||
|
||||
export interface ThinkingConfigEnabled {
|
||||
/**
|
||||
* Determines how many tokens Claude can use for its internal reasoning process.
|
||||
* Larger budgets can enable more thorough analysis for complex problems, improving
|
||||
* response quality.
|
||||
*
|
||||
* Must be ≥1024 and less than `max_tokens`.
|
||||
*
|
||||
* See
|
||||
* [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
|
||||
* for details.
|
||||
*/
|
||||
budget_tokens: number;
|
||||
|
||||
type: 'enabled';
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for enabling Claude's extended thinking.
|
||||
*
|
||||
* When enabled, responses include `thinking` content blocks showing Claude's
|
||||
* thinking process before the final answer. Requires a minimum budget of 1,024
|
||||
* tokens and counts towards your `max_tokens` limit.
|
||||
*
|
||||
* See
|
||||
* [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
|
||||
* for details.
|
||||
*/
|
||||
export type ThinkingConfigParam = ThinkingConfigEnabled | ThinkingConfigDisabled;
|
||||
|
||||
export type AnthropicModelOptions = Partial<Omit<AnthropicParameters, 'thinking'>> & {
|
||||
thinking?: AnthropicParameters['thinking'] | null;
|
||||
user?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Configuration options for the getLLMConfig function
|
||||
*/
|
||||
export interface AnthropicConfigOptions {
|
||||
modelOptions?: AnthropicModelOptions;
|
||||
/** Proxy server URL */
|
||||
proxy?: string | null;
|
||||
/** URL for a reverse proxy, if used */
|
||||
reverseProxyUrl?: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return type for getLLMConfig function
|
||||
*/
|
||||
export type AnthropicLLMConfigResult = LLMConfigResult<
|
||||
AnthropicClientOptions & {
|
||||
clientOptions?: {
|
||||
fetchOptions?: { dispatcher: Dispatcher };
|
||||
};
|
||||
stream?: boolean;
|
||||
}
|
||||
>;
|
|
@ -13,3 +13,4 @@ export * from './prompts';
|
|||
export * from './run';
|
||||
export * from './tools';
|
||||
export * from './zod';
|
||||
export * from './anthropic';
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import { z } from 'zod';
|
||||
import { openAISchema, EModelEndpoint } from 'librechat-data-provider';
|
||||
import type { TEndpointOption, TAzureConfig, TEndpoint } from 'librechat-data-provider';
|
||||
import type { TEndpointOption, TAzureConfig, TEndpoint, TConfig } from 'librechat-data-provider';
|
||||
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
||||
import type { OpenAIClientOptions, Providers } from '@librechat/agents';
|
||||
import type { AzureOptions } from './azure';
|
||||
|
@ -8,11 +8,13 @@ import type { AppConfig } from './config';
|
|||
|
||||
export type OpenAIParameters = z.infer<typeof openAISchema>;
|
||||
|
||||
export type OpenAIModelOptions = Partial<OpenAIParameters>;
|
||||
|
||||
/**
|
||||
* Configuration options for the getLLMConfig function
|
||||
*/
|
||||
export interface OpenAIConfigOptions {
|
||||
modelOptions?: Partial<OpenAIParameters>;
|
||||
modelOptions?: OpenAIModelOptions;
|
||||
directEndpoint?: boolean;
|
||||
reverseProxyUrl?: string | null;
|
||||
defaultQuery?: Record<string, string | undefined>;
|
||||
|
@ -22,24 +24,28 @@ export interface OpenAIConfigOptions {
|
|||
streaming?: boolean;
|
||||
addParams?: Record<string, unknown>;
|
||||
dropParams?: string[];
|
||||
customParams?: Partial<TConfig['customParams']>;
|
||||
}
|
||||
|
||||
export type OpenAIConfiguration = OpenAIClientOptions['configuration'];
|
||||
|
||||
export type ClientOptions = OpenAIClientOptions & {
|
||||
export type OAIClientOptions = OpenAIClientOptions & {
|
||||
include_reasoning?: boolean;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return type for getLLMConfig function
|
||||
*/
|
||||
export interface LLMConfigResult {
|
||||
llmConfig: ClientOptions;
|
||||
configOptions: OpenAIConfiguration;
|
||||
tools?: BindToolsInput[];
|
||||
export interface LLMConfigResult<T = OAIClientOptions> {
|
||||
llmConfig: T;
|
||||
provider?: Providers;
|
||||
tools?: BindToolsInput[];
|
||||
}
|
||||
|
||||
export type OpenAIConfigResult = LLMConfigResult<OAIClientOptions> & {
|
||||
configOptions?: OpenAIConfiguration;
|
||||
};
|
||||
|
||||
/**
|
||||
* Interface for user values retrieved from the database
|
||||
*/
|
||||
|
|
|
@ -15,3 +15,4 @@ export * from './text';
|
|||
export { default as Tokenizer } from './tokenizer';
|
||||
export * from './yaml';
|
||||
export * from './http';
|
||||
export * from './tokens';
|
||||
|
|
525
packages/api/src/utils/tokens.ts
Normal file
525
packages/api/src/utils/tokens.ts
Normal file
|
@ -0,0 +1,525 @@
|
|||
import z from 'zod';
|
||||
import { EModelEndpoint } from 'librechat-data-provider';
|
||||
|
||||
/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit
|
||||
*
|
||||
* Note: the [key: string]: unknown is not in the original JSDoc typedef in /api/typedefs.js, but I've included it since
|
||||
* getModelMaxOutputTokens calls getModelTokenValue with a key of 'output', which was not in the original JSDoc typedef,
|
||||
* but would be referenced in a TokenConfig in the if(matchedPattern) portion of getModelTokenValue.
|
||||
* So in order to preserve functionality for that case and any others which might reference an additional key I'm unaware of,
|
||||
* I've included it here until the interface can be typed more tightly.
|
||||
*/
|
||||
export interface TokenConfig {
|
||||
prompt: number;
|
||||
completion: number;
|
||||
context: number;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */
|
||||
export type EndpointTokenConfig = Record<string, TokenConfig>;
|
||||
|
||||
const openAIModels = {
|
||||
'o4-mini': 200000,
|
||||
'o3-mini': 195000, // -5000 from max
|
||||
o3: 200000,
|
||||
o1: 195000, // -5000 from max
|
||||
'o1-mini': 127500, // -500 from max
|
||||
'o1-preview': 127500, // -500 from max
|
||||
'gpt-4': 8187, // -5 from max
|
||||
'gpt-4-0613': 8187, // -5 from max
|
||||
'gpt-4-32k': 32758, // -10 from max
|
||||
'gpt-4-32k-0314': 32758, // -10 from max
|
||||
'gpt-4-32k-0613': 32758, // -10 from max
|
||||
'gpt-4-1106': 127500, // -500 from max
|
||||
'gpt-4-0125': 127500, // -500 from max
|
||||
'gpt-4.5': 127500, // -500 from max
|
||||
'gpt-4.1': 1047576,
|
||||
'gpt-4.1-mini': 1047576,
|
||||
'gpt-4.1-nano': 1047576,
|
||||
'gpt-5': 400000,
|
||||
'gpt-5-mini': 400000,
|
||||
'gpt-5-nano': 400000,
|
||||
'gpt-4o': 127500, // -500 from max
|
||||
'gpt-4o-mini': 127500, // -500 from max
|
||||
'gpt-4o-2024-05-13': 127500, // -500 from max
|
||||
'gpt-4o-2024-08-06': 127500, // -500 from max
|
||||
'gpt-4-turbo': 127500, // -500 from max
|
||||
'gpt-4-vision': 127500, // -500 from max
|
||||
'gpt-3.5-turbo': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-0613': 4092, // -5 from max
|
||||
'gpt-3.5-turbo-0301': 4092, // -5 from max
|
||||
'gpt-3.5-turbo-16k': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-16k-0613': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-1106': 16375, // -10 from max
|
||||
'gpt-3.5-turbo-0125': 16375, // -10 from max
|
||||
};
|
||||
|
||||
const mistralModels = {
|
||||
'mistral-': 31990, // -10 from max
|
||||
'mistral-7b': 31990, // -10 from max
|
||||
'mistral-small': 31990, // -10 from max
|
||||
'mixtral-8x7b': 31990, // -10 from max
|
||||
'mistral-large': 131000,
|
||||
'mistral-large-2402': 127500,
|
||||
'mistral-large-2407': 127500,
|
||||
'pixtral-large': 131000,
|
||||
'mistral-saba': 32000,
|
||||
codestral: 256000,
|
||||
'ministral-8b': 131000,
|
||||
'ministral-3b': 131000,
|
||||
};
|
||||
|
||||
const cohereModels = {
|
||||
'command-light': 4086, // -10 from max
|
||||
'command-light-nightly': 8182, // -10 from max
|
||||
command: 4086, // -10 from max
|
||||
'command-nightly': 8182, // -10 from max
|
||||
'command-r': 127500, // -500 from max
|
||||
'command-r-plus': 127500, // -500 from max
|
||||
};
|
||||
|
||||
const googleModels = {
|
||||
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
|
||||
gemma: 8196,
|
||||
'gemma-2': 32768,
|
||||
'gemma-3': 32768,
|
||||
'gemma-3-27b': 131072,
|
||||
gemini: 30720, // -2048 from max
|
||||
'gemini-pro-vision': 12288,
|
||||
'gemini-exp': 2000000,
|
||||
'gemini-2.5': 1000000, // 1M input tokens, 64k output tokens
|
||||
'gemini-2.5-pro': 1000000,
|
||||
'gemini-2.5-flash': 1000000,
|
||||
'gemini-2.0': 2000000,
|
||||
'gemini-2.0-flash': 1000000,
|
||||
'gemini-2.0-flash-lite': 1000000,
|
||||
'gemini-1.5': 1000000,
|
||||
'gemini-1.5-flash': 1000000,
|
||||
'gemini-1.5-flash-8b': 1000000,
|
||||
'text-bison-32k': 32758, // -10 from max
|
||||
'chat-bison-32k': 32758, // -10 from max
|
||||
'code-bison-32k': 32758, // -10 from max
|
||||
'codechat-bison-32k': 32758,
|
||||
/* Codey, -5 from max: 6144 */
|
||||
'code-': 6139,
|
||||
'codechat-': 6139,
|
||||
/* PaLM2, -5 from max: 8192 */
|
||||
'text-': 8187,
|
||||
'chat-': 8187,
|
||||
};
|
||||
|
||||
const anthropicModels = {
|
||||
'claude-': 100000,
|
||||
'claude-instant': 100000,
|
||||
'claude-2': 100000,
|
||||
'claude-2.1': 200000,
|
||||
'claude-3': 200000,
|
||||
'claude-3-haiku': 200000,
|
||||
'claude-3-sonnet': 200000,
|
||||
'claude-3-opus': 200000,
|
||||
'claude-3.5-haiku': 200000,
|
||||
'claude-3-5-haiku': 200000,
|
||||
'claude-3-5-sonnet': 200000,
|
||||
'claude-3.5-sonnet': 200000,
|
||||
'claude-3-7-sonnet': 200000,
|
||||
'claude-3.7-sonnet': 200000,
|
||||
'claude-3-5-sonnet-latest': 200000,
|
||||
'claude-3.5-sonnet-latest': 200000,
|
||||
'claude-sonnet-4': 1000000,
|
||||
'claude-opus-4': 200000,
|
||||
'claude-4': 200000,
|
||||
};
|
||||
|
||||
const deepseekModels = {
|
||||
'deepseek-reasoner': 63000, // -1000 from max (API)
|
||||
deepseek: 63000, // -1000 from max (API)
|
||||
'deepseek.r1': 127500,
|
||||
};
|
||||
|
||||
const metaModels = {
|
||||
// Basic patterns
|
||||
llama3: 8000,
|
||||
llama2: 4000,
|
||||
'llama-3': 8000,
|
||||
'llama-2': 4000,
|
||||
|
||||
// llama3.x pattern
|
||||
'llama3.1': 127500,
|
||||
'llama3.2': 127500,
|
||||
'llama3.3': 127500,
|
||||
|
||||
// llama3-x pattern
|
||||
'llama3-1': 127500,
|
||||
'llama3-2': 127500,
|
||||
'llama3-3': 127500,
|
||||
|
||||
// llama-3.x pattern
|
||||
'llama-3.1': 127500,
|
||||
'llama-3.2': 127500,
|
||||
'llama-3.3': 127500,
|
||||
|
||||
// llama3.x:Nb pattern
|
||||
'llama3.1:405b': 127500,
|
||||
'llama3.1:70b': 127500,
|
||||
'llama3.1:8b': 127500,
|
||||
'llama3.2:1b': 127500,
|
||||
'llama3.2:3b': 127500,
|
||||
'llama3.2:11b': 127500,
|
||||
'llama3.2:90b': 127500,
|
||||
'llama3.3:70b': 127500,
|
||||
|
||||
// llama3-x-Nb pattern
|
||||
'llama3-1-405b': 127500,
|
||||
'llama3-1-70b': 127500,
|
||||
'llama3-1-8b': 127500,
|
||||
'llama3-2-1b': 127500,
|
||||
'llama3-2-3b': 127500,
|
||||
'llama3-2-11b': 127500,
|
||||
'llama3-2-90b': 127500,
|
||||
'llama3-3-70b': 127500,
|
||||
|
||||
// llama-3.x-Nb pattern
|
||||
'llama-3.1-405b': 127500,
|
||||
'llama-3.1-70b': 127500,
|
||||
'llama-3.1-8b': 127500,
|
||||
'llama-3.2-1b': 127500,
|
||||
'llama-3.2-3b': 127500,
|
||||
'llama-3.2-11b': 127500,
|
||||
'llama-3.2-90b': 127500,
|
||||
'llama-3.3-70b': 127500,
|
||||
|
||||
// Original llama2/3 patterns
|
||||
'llama3-70b': 8000,
|
||||
'llama3-8b': 8000,
|
||||
'llama2-70b': 4000,
|
||||
'llama2-13b': 4000,
|
||||
'llama3:70b': 8000,
|
||||
'llama3:8b': 8000,
|
||||
'llama2:70b': 4000,
|
||||
};
|
||||
|
||||
const ollamaModels = {
|
||||
'qwen2.5': 32000,
|
||||
};
|
||||
|
||||
const ai21Models = {
|
||||
'ai21.j2-mid-v1': 8182, // -10 from max
|
||||
'ai21.j2-ultra-v1': 8182, // -10 from max
|
||||
'ai21.jamba-instruct-v1:0': 255500, // -500 from max
|
||||
};
|
||||
|
||||
const amazonModels = {
|
||||
'amazon.titan-text-lite-v1': 4000,
|
||||
'amazon.titan-text-express-v1': 8000,
|
||||
'amazon.titan-text-premier-v1:0': 31500, // -500 from max
|
||||
// https://aws.amazon.com/ai/generative-ai/nova/
|
||||
'amazon.nova-micro-v1:0': 127000, // -1000 from max,
|
||||
'amazon.nova-lite-v1:0': 295000, // -5000 from max,
|
||||
'amazon.nova-pro-v1:0': 295000, // -5000 from max,
|
||||
'amazon.nova-premier-v1:0': 995000, // -5000 from max,
|
||||
};
|
||||
|
||||
const bedrockModels = {
|
||||
...anthropicModels,
|
||||
...mistralModels,
|
||||
...cohereModels,
|
||||
...ollamaModels,
|
||||
...deepseekModels,
|
||||
...metaModels,
|
||||
...ai21Models,
|
||||
...amazonModels,
|
||||
};
|
||||
|
||||
const xAIModels = {
|
||||
grok: 131072,
|
||||
'grok-beta': 131072,
|
||||
'grok-vision-beta': 8192,
|
||||
'grok-2': 131072,
|
||||
'grok-2-latest': 131072,
|
||||
'grok-2-1212': 131072,
|
||||
'grok-2-vision': 32768,
|
||||
'grok-2-vision-latest': 32768,
|
||||
'grok-2-vision-1212': 32768,
|
||||
'grok-3': 131072,
|
||||
'grok-3-fast': 131072,
|
||||
'grok-3-mini': 131072,
|
||||
'grok-3-mini-fast': 131072,
|
||||
'grok-4': 256000, // 256K context
|
||||
};
|
||||
|
||||
const aggregateModels = {
|
||||
...openAIModels,
|
||||
...googleModels,
|
||||
...bedrockModels,
|
||||
...xAIModels,
|
||||
// misc.
|
||||
kimi: 131000,
|
||||
// GPT-OSS
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
};
|
||||
|
||||
export const maxTokensMap = {
|
||||
[EModelEndpoint.azureOpenAI]: openAIModels,
|
||||
[EModelEndpoint.openAI]: aggregateModels,
|
||||
[EModelEndpoint.agents]: aggregateModels,
|
||||
[EModelEndpoint.custom]: aggregateModels,
|
||||
[EModelEndpoint.google]: googleModels,
|
||||
[EModelEndpoint.anthropic]: anthropicModels,
|
||||
[EModelEndpoint.bedrock]: bedrockModels,
|
||||
};
|
||||
|
||||
export const modelMaxOutputs = {
|
||||
o1: 32268, // -500 from max: 32,768
|
||||
'o1-mini': 65136, // -500 from max: 65,536
|
||||
'o1-preview': 32268, // -500 from max: 32,768
|
||||
'gpt-5': 128000,
|
||||
'gpt-5-mini': 128000,
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
system_default: 32000,
|
||||
};
|
||||
|
||||
/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
|
||||
const anthropicMaxOutputs = {
|
||||
'claude-3-haiku': 4096,
|
||||
'claude-3-sonnet': 4096,
|
||||
'claude-3-opus': 4096,
|
||||
'claude-opus-4': 32000,
|
||||
'claude-sonnet-4': 64000,
|
||||
'claude-3.5-sonnet': 8192,
|
||||
'claude-3-5-sonnet': 8192,
|
||||
'claude-3.7-sonnet': 128000,
|
||||
'claude-3-7-sonnet': 128000,
|
||||
};
|
||||
|
||||
export const maxOutputTokensMap = {
|
||||
[EModelEndpoint.anthropic]: anthropicMaxOutputs,
|
||||
[EModelEndpoint.azureOpenAI]: modelMaxOutputs,
|
||||
[EModelEndpoint.openAI]: modelMaxOutputs,
|
||||
[EModelEndpoint.custom]: modelMaxOutputs,
|
||||
};
|
||||
|
||||
/**
|
||||
* Finds the first matching pattern in the tokens map.
|
||||
* @param {string} modelName
|
||||
* @param {Record<string, number> | EndpointTokenConfig} tokensMap
|
||||
* @returns {string|null}
|
||||
*/
|
||||
export function findMatchingPattern(
|
||||
modelName: string,
|
||||
tokensMap: Record<string, number> | EndpointTokenConfig,
|
||||
): string | null {
|
||||
const keys = Object.keys(tokensMap);
|
||||
for (let i = keys.length - 1; i >= 0; i--) {
|
||||
const modelKey = keys[i];
|
||||
if (modelName.includes(modelKey)) {
|
||||
return modelKey;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a token value for a given model name from a tokens map.
|
||||
*
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param tokensMap - The map of model names to token values.
|
||||
* @param [key='context'] - The key to look up in the tokens map.
|
||||
* @returns The token value for the given model or undefined if no match is found.
|
||||
*/
|
||||
export function getModelTokenValue(
|
||||
modelName: string,
|
||||
tokensMap?: EndpointTokenConfig | Record<string, number>,
|
||||
key = 'context' as keyof TokenConfig,
|
||||
): number | undefined {
|
||||
if (typeof modelName !== 'string' || !tokensMap) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const value = tokensMap[modelName];
|
||||
if (typeof value === 'number') {
|
||||
return value;
|
||||
}
|
||||
|
||||
if (value?.context) {
|
||||
return value.context;
|
||||
}
|
||||
|
||||
const matchedPattern = findMatchingPattern(modelName, tokensMap);
|
||||
|
||||
if (matchedPattern) {
|
||||
const result = tokensMap[matchedPattern];
|
||||
if (typeof result === 'number') {
|
||||
return result;
|
||||
}
|
||||
|
||||
const tokenValue = result?.[key];
|
||||
if (typeof tokenValue === 'number') {
|
||||
return tokenValue;
|
||||
}
|
||||
return tokensMap.system_default as number | undefined;
|
||||
}
|
||||
|
||||
return tokensMap.system_default as number | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the maximum tokens for a given model name.
|
||||
*
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
|
||||
* @returns The maximum tokens for the given model or undefined if no match is found.
|
||||
*/
|
||||
export function getModelMaxTokens(
|
||||
modelName: string,
|
||||
endpoint = EModelEndpoint.openAI,
|
||||
endpointTokenConfig?: EndpointTokenConfig,
|
||||
): number | undefined {
|
||||
const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap];
|
||||
return getModelTokenValue(modelName, tokensMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the maximum output tokens for a given model name.
|
||||
*
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
|
||||
* @returns The maximum output tokens for the given model or undefined if no match is found.
|
||||
*/
|
||||
export function getModelMaxOutputTokens(
|
||||
modelName: string,
|
||||
endpoint = EModelEndpoint.openAI,
|
||||
endpointTokenConfig?: EndpointTokenConfig,
|
||||
): number | undefined {
|
||||
const tokensMap =
|
||||
endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap];
|
||||
return getModelTokenValue(modelName, tokensMap, 'output');
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the model name key for a given model name input. If the exact model name isn't found,
|
||||
* it searches for partial matches within the model name, checking keys in reverse order.
|
||||
*
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @returns The model name key for the given model; returns input if no match is found and is string.
|
||||
*
|
||||
* @example
|
||||
* matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613'
|
||||
* matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k'
|
||||
* matchModelName('unknown-model'); // Returns undefined
|
||||
*/
|
||||
export function matchModelName(
|
||||
modelName: string,
|
||||
endpoint = EModelEndpoint.openAI,
|
||||
): string | undefined {
|
||||
if (typeof modelName !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const tokensMap: Record<string, number> = maxTokensMap[endpoint as keyof typeof maxTokensMap];
|
||||
if (!tokensMap) {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
if (tokensMap[modelName]) {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
const matchedPattern = findMatchingPattern(modelName, tokensMap);
|
||||
return matchedPattern || modelName;
|
||||
}
|
||||
|
||||
export const modelSchema = z.object({
|
||||
id: z.string(),
|
||||
pricing: z.object({
|
||||
prompt: z.string(),
|
||||
completion: z.string(),
|
||||
}),
|
||||
context_length: z.number(),
|
||||
});
|
||||
|
||||
export const inputSchema = z.object({
|
||||
data: z.array(modelSchema),
|
||||
});
|
||||
|
||||
/**
|
||||
* Processes a list of model data from an API and organizes it into structured data based on URL and specifics of rates and context.
|
||||
* @param {{ data: Array<z.infer<typeof modelSchema>> }} input The input object containing base URL and data fetched from the API.
|
||||
* @returns {EndpointTokenConfig} The processed model data.
|
||||
*/
|
||||
export function processModelData(input: z.infer<typeof inputSchema>): EndpointTokenConfig {
|
||||
const validationResult = inputSchema.safeParse(input);
|
||||
if (!validationResult.success) {
|
||||
throw new Error('Invalid input data');
|
||||
}
|
||||
const { data } = validationResult.data;
|
||||
|
||||
/** @type {EndpointTokenConfig} */
|
||||
const tokenConfig: EndpointTokenConfig = {};
|
||||
|
||||
for (const model of data) {
|
||||
const modelKey = model.id;
|
||||
if (modelKey === 'openrouter/auto') {
|
||||
model.pricing = {
|
||||
prompt: '0.00001',
|
||||
completion: '0.00003',
|
||||
};
|
||||
}
|
||||
const prompt = parseFloat(model.pricing.prompt) * 1000000;
|
||||
const completion = parseFloat(model.pricing.completion) * 1000000;
|
||||
|
||||
tokenConfig[modelKey] = {
|
||||
prompt,
|
||||
completion,
|
||||
context: model.context_length,
|
||||
};
|
||||
}
|
||||
|
||||
return tokenConfig;
|
||||
}
|
||||
|
||||
export const tiktokenModels = new Set([
|
||||
'text-davinci-003',
|
||||
'text-davinci-002',
|
||||
'text-davinci-001',
|
||||
'text-curie-001',
|
||||
'text-babbage-001',
|
||||
'text-ada-001',
|
||||
'davinci',
|
||||
'curie',
|
||||
'babbage',
|
||||
'ada',
|
||||
'code-davinci-002',
|
||||
'code-davinci-001',
|
||||
'code-cushman-002',
|
||||
'code-cushman-001',
|
||||
'davinci-codex',
|
||||
'cushman-codex',
|
||||
'text-davinci-edit-001',
|
||||
'code-davinci-edit-001',
|
||||
'text-embedding-ada-002',
|
||||
'text-similarity-davinci-001',
|
||||
'text-similarity-curie-001',
|
||||
'text-similarity-babbage-001',
|
||||
'text-similarity-ada-001',
|
||||
'text-search-davinci-doc-001',
|
||||
'text-search-curie-doc-001',
|
||||
'text-search-babbage-doc-001',
|
||||
'text-search-ada-doc-001',
|
||||
'code-search-babbage-code-001',
|
||||
'code-search-ada-code-001',
|
||||
'gpt2',
|
||||
'gpt-4',
|
||||
'gpt-4-0314',
|
||||
'gpt-4-32k',
|
||||
'gpt-4-32k-0314',
|
||||
'gpt-3.5-turbo',
|
||||
'gpt-3.5-turbo-0301',
|
||||
]);
|
|
@ -619,14 +619,14 @@ export const tConversationSchema = z.object({
|
|||
userLabel: z.string().optional(),
|
||||
model: z.string().nullable().optional(),
|
||||
promptPrefix: z.string().nullable().optional(),
|
||||
temperature: z.number().optional(),
|
||||
temperature: z.number().nullable().optional(),
|
||||
topP: z.number().optional(),
|
||||
topK: z.number().optional(),
|
||||
top_p: z.number().optional(),
|
||||
frequency_penalty: z.number().optional(),
|
||||
presence_penalty: z.number().optional(),
|
||||
parentMessageId: z.string().optional(),
|
||||
maxOutputTokens: coerceNumber.optional(),
|
||||
maxOutputTokens: coerceNumber.nullable().optional(),
|
||||
maxContextTokens: coerceNumber.optional(),
|
||||
max_tokens: coerceNumber.optional(),
|
||||
/* Anthropic */
|
||||
|
@ -634,6 +634,7 @@ export const tConversationSchema = z.object({
|
|||
system: z.string().optional(),
|
||||
thinking: z.boolean().optional(),
|
||||
thinkingBudget: coerceNumber.optional(),
|
||||
stream: z.boolean().optional(),
|
||||
/* artifacts */
|
||||
artifacts: z.string().optional(),
|
||||
/* google */
|
||||
|
@ -1152,6 +1153,8 @@ export const anthropicBaseSchema = tConversationSchema.pick({
|
|||
maxContextTokens: true,
|
||||
web_search: true,
|
||||
fileTokenLimit: true,
|
||||
stop: true,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
export const anthropicSchema = anthropicBaseSchema
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue