Merge branch 'main' into feat/Custom-Token-Rates-for-Endpoints

This commit is contained in:
Ruben Talstra 2025-03-10 15:24:21 +01:00 committed by GitHub
commit f439f1a80a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
195 changed files with 7618 additions and 3779 deletions

View file

@ -47,7 +47,7 @@ async function loadConfigModels(req) {
);
/**
* @type {Record<string, string[]>}
* @type {Record<string, Promise<string[]>>}
* Map for promises keyed by unique combination of baseURL and apiKey */
const fetchPromisesMap = {};
/**
@ -102,7 +102,7 @@ async function loadConfigModels(req) {
for (const name of associatedNames) {
const endpoint = endpointsMap[name];
modelsConfig[name] = !modelData?.length ? endpoint.models.default ?? [] : modelData;
modelsConfig[name] = !modelData?.length ? (endpoint.models.default ?? []) : modelData;
}
}

View file

@ -5,8 +5,8 @@ const {
getGoogleModels,
getBedrockModels,
getAnthropicModels,
getChatGPTBrowserModels,
} = require('~/server/services/ModelService');
const { logger } = require('~/config');
/**
* Loads the default models for the application.
@ -15,31 +15,68 @@ const {
* @param {Express.Request} req - The Express request object.
*/
async function loadDefaultModels(req) {
const google = getGoogleModels();
const openAI = await getOpenAIModels({ user: req.user.id });
const anthropic = getAnthropicModels();
const chatGPTBrowser = getChatGPTBrowserModels();
const azureOpenAI = await getOpenAIModels({ user: req.user.id, azure: true });
const gptPlugins = await getOpenAIModels({
user: req.user.id,
azure: useAzurePlugins,
plugins: true,
});
const assistants = await getOpenAIModels({ assistants: true });
const azureAssistants = await getOpenAIModels({ azureAssistants: true });
try {
const [
openAI,
anthropic,
azureOpenAI,
gptPlugins,
assistants,
azureAssistants,
google,
bedrock,
] = await Promise.all([
getOpenAIModels({ user: req.user.id }).catch((error) => {
logger.error('Error fetching OpenAI models:', error);
return [];
}),
getAnthropicModels({ user: req.user.id }).catch((error) => {
logger.error('Error fetching Anthropic models:', error);
return [];
}),
getOpenAIModels({ user: req.user.id, azure: true }).catch((error) => {
logger.error('Error fetching Azure OpenAI models:', error);
return [];
}),
getOpenAIModels({ user: req.user.id, azure: useAzurePlugins, plugins: true }).catch(
(error) => {
logger.error('Error fetching Plugin models:', error);
return [];
},
),
getOpenAIModels({ assistants: true }).catch((error) => {
logger.error('Error fetching OpenAI Assistants API models:', error);
return [];
}),
getOpenAIModels({ azureAssistants: true }).catch((error) => {
logger.error('Error fetching Azure OpenAI Assistants API models:', error);
return [];
}),
Promise.resolve(getGoogleModels()).catch((error) => {
logger.error('Error getting Google models:', error);
return [];
}),
Promise.resolve(getBedrockModels()).catch((error) => {
logger.error('Error getting Bedrock models:', error);
return [];
}),
]);
return {
[EModelEndpoint.openAI]: openAI,
[EModelEndpoint.agents]: openAI,
[EModelEndpoint.google]: google,
[EModelEndpoint.anthropic]: anthropic,
[EModelEndpoint.gptPlugins]: gptPlugins,
[EModelEndpoint.azureOpenAI]: azureOpenAI,
[EModelEndpoint.chatGPTBrowser]: chatGPTBrowser,
[EModelEndpoint.assistants]: assistants,
[EModelEndpoint.azureAssistants]: azureAssistants,
[EModelEndpoint.bedrock]: getBedrockModels(),
};
return {
[EModelEndpoint.openAI]: openAI,
[EModelEndpoint.agents]: openAI,
[EModelEndpoint.google]: google,
[EModelEndpoint.anthropic]: anthropic,
[EModelEndpoint.gptPlugins]: gptPlugins,
[EModelEndpoint.azureOpenAI]: azureOpenAI,
[EModelEndpoint.assistants]: assistants,
[EModelEndpoint.azureAssistants]: azureAssistants,
[EModelEndpoint.bedrock]: bedrock,
};
} catch (error) {
logger.error('Error fetching default models:', error);
throw new Error(`Failed to load default models: ${error.message}`);
}
}
module.exports = loadDefaultModels;

View file

@ -22,6 +22,7 @@ const { getAgent } = require('~/models/Agent');
const { logger } = require('~/config');
const providerConfigMap = {
[Providers.XAI]: initCustom,
[Providers.OLLAMA]: initCustom,
[Providers.DEEPSEEK]: initCustom,
[Providers.OPENROUTER]: initCustom,
@ -101,6 +102,7 @@ const initializeAgentOptions = async ({
});
const provider = agent.provider;
agent.endpoint = provider;
let getOptions = providerConfigMap[provider];
if (!getOptions && providerConfigMap[provider.toLowerCase()] != null) {
agent.provider = provider.toLowerCase();
@ -112,9 +114,7 @@ const initializeAgentOptions = async ({
}
getOptions = initCustom;
agent.provider = Providers.OPENAI;
agent.endpoint = provider.toLowerCase();
}
const model_parameters = Object.assign(
{},
agent.model_parameters ?? { model: agent.model },

View file

@ -20,10 +20,19 @@ const addTitle = async (req, { text, response, client }) => {
const titleCache = getLogStores(CacheKeys.GEN_TITLE);
const key = `${req.user.id}-${response.conversationId}`;
const responseText =
response?.content && Array.isArray(response?.content)
? response.content.reduce((acc, block) => {
if (block?.type === 'text') {
return acc + block.text;
}
return acc;
}, '')
: (response?.content ?? response?.text ?? '');
const title = await client.titleConvo({
text,
responseText: response?.text ?? '',
responseText,
conversationId: response.conversationId,
});
await titleCache.set(key, title, 120000);

View file

@ -48,7 +48,8 @@ function getClaudeHeaders(model, supportsCacheControl) {
};
} else if (/claude-3[-.]7/.test(model)) {
return {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
'anthropic-beta':
'token-efficient-tools-2025-02-19,output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else {
return {

View file

@ -27,6 +27,7 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio
if (anthropicConfig) {
clientOptions.streamRate = anthropicConfig.streamRate;
clientOptions.titleModel = anthropicConfig.titleModel;
}
/** @type {undefined | TBaseEndpoint} */

View file

@ -1,6 +1,6 @@
const { HttpsProxyAgent } = require('https-proxy-agent');
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');
/**
* Generates configuration options for creating an Anthropic language model (LLM) instance.
@ -49,13 +49,14 @@ function getLLMConfig(apiKey, options = {}) {
clientOptions: {},
};
requestOptions = configureReasoning(requestOptions, systemOptions);
if (!/claude-3[-.]7/.test(mergedOptions.model)) {
if (mergedOptions.topP !== undefined) {
requestOptions.topP = mergedOptions.topP;
}
if (mergedOptions.topK !== undefined) {
requestOptions.topK = mergedOptions.topK;
}
requestOptions.topP = mergedOptions.topP;
requestOptions.topK = mergedOptions.topK;
} else if (requestOptions.thinking == null) {
requestOptions.topP = mergedOptions.topP;
requestOptions.topK = mergedOptions.topK;
}
const supportsCacheControl =

View file

@ -109,4 +109,45 @@ describe('getLLMConfig', () => {
// Just verifying that the promptCache setting is processed
expect(result.llmConfig).toBeDefined();
});
it('should include topK and topP for Claude-3.7 models when thinking is not enabled', () => {
// Test with thinking explicitly set to null/undefined
const result = getLLMConfig('test-api-key', {
modelOptions: {
model: 'claude-3-7-sonnet',
topK: 10,
topP: 0.9,
thinking: false,
},
});
expect(result.llmConfig).toHaveProperty('topK', 10);
expect(result.llmConfig).toHaveProperty('topP', 0.9);
// Test with thinking explicitly set to false
const result2 = getLLMConfig('test-api-key', {
modelOptions: {
model: 'claude-3-7-sonnet',
topK: 10,
topP: 0.9,
thinking: false,
},
});
expect(result2.llmConfig).toHaveProperty('topK', 10);
expect(result2.llmConfig).toHaveProperty('topP', 0.9);
// Test with decimal notation as well
const result3 = getLLMConfig('test-api-key', {
modelOptions: {
model: 'claude-3.7-sonnet',
topK: 10,
topP: 0.9,
thinking: false,
},
});
expect(result3.llmConfig).toHaveProperty('topK', 10);
expect(result3.llmConfig).toHaveProperty('topP', 0.9);
});
});

View file

@ -1,6 +1,5 @@
const { removeNullishValues, bedrockInputParser } = require('librechat-data-provider');
const { removeNullishValues } = require('librechat-data-provider');
const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
const { logger } = require('~/config');
const buildOptions = (endpoint, parsedBody) => {
const {
@ -15,12 +14,6 @@ const buildOptions = (endpoint, parsedBody) => {
artifacts,
...model_parameters
} = parsedBody;
let parsedParams = model_parameters;
try {
parsedParams = bedrockInputParser.parse(model_parameters);
} catch (error) {
logger.warn('Failed to parse bedrock input', error);
}
const endpointOption = removeNullishValues({
endpoint,
name,
@ -31,7 +24,7 @@ const buildOptions = (endpoint, parsedBody) => {
spec,
promptPrefix,
maxContextTokens,
model_parameters: parsedParams,
model_parameters,
});
if (typeof artifacts === 'string') {

View file

@ -1,14 +1,16 @@
const { HttpsProxyAgent } = require('https-proxy-agent');
const {
EModelEndpoint,
Constants,
AuthType,
Constants,
EModelEndpoint,
bedrockInputParser,
bedrockOutputParser,
removeNullishValues,
} = require('librechat-data-provider');
const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService');
const { sleep } = require('~/server/utils');
const getOptions = async ({ req, endpointOption }) => {
const getOptions = async ({ req, overrideModel, endpointOption }) => {
const {
BEDROCK_AWS_SECRET_ACCESS_KEY,
BEDROCK_AWS_ACCESS_KEY_ID,
@ -62,39 +64,44 @@ const getOptions = async ({ req, endpointOption }) => {
/** @type {BedrockClientOptions} */
const requestOptions = {
model: endpointOption.model,
model: overrideModel ?? endpointOption.model,
region: BEDROCK_AWS_DEFAULT_REGION,
streaming: true,
streamUsage: true,
callbacks: [
{
handleLLMNewToken: async () => {
if (!streamRate) {
return;
}
await sleep(streamRate);
},
},
],
};
if (credentials) {
requestOptions.credentials = credentials;
}
if (BEDROCK_REVERSE_PROXY) {
requestOptions.endpointHost = BEDROCK_REVERSE_PROXY;
}
const configOptions = {};
if (PROXY) {
/** NOTE: NOT SUPPORTED BY BEDROCK */
configOptions.httpAgent = new HttpsProxyAgent(PROXY);
}
const llmConfig = bedrockOutputParser(
bedrockInputParser.parse(
removeNullishValues(Object.assign(requestOptions, endpointOption.model_parameters)),
),
);
if (credentials) {
llmConfig.credentials = credentials;
}
if (BEDROCK_REVERSE_PROXY) {
llmConfig.endpointHost = BEDROCK_REVERSE_PROXY;
}
llmConfig.callbacks = [
{
handleLLMNewToken: async () => {
if (!streamRate) {
return;
}
await sleep(streamRate);
},
},
];
return {
/** @type {BedrockClientOptions} */
llmConfig: removeNullishValues(Object.assign(requestOptions, endpointOption.model_parameters)),
llmConfig,
configOptions,
};
};

View file

@ -141,7 +141,8 @@ const initializeClient = async ({ req, res, endpointOption, optionsOnly, overrid
},
clientOptions,
);
const options = getLLMConfig(apiKey, clientOptions);
clientOptions.modelOptions.user = req.user.id;
const options = getLLMConfig(apiKey, clientOptions, endpoint);
if (!customOptions.streamRate) {
return options;
}

View file

@ -5,12 +5,7 @@ const { isEnabled } = require('~/server/utils');
const { GoogleClient } = require('~/app');
const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => {
const {
GOOGLE_KEY,
GOOGLE_REVERSE_PROXY,
GOOGLE_AUTH_HEADER,
PROXY,
} = process.env;
const { GOOGLE_KEY, GOOGLE_REVERSE_PROXY, GOOGLE_AUTH_HEADER, PROXY } = process.env;
const isUserProvided = GOOGLE_KEY === 'user_provided';
const { key: expiresAt } = req.body;
@ -43,6 +38,7 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio
if (googleConfig) {
clientOptions.streamRate = googleConfig.streamRate;
clientOptions.titleModel = googleConfig.titleModel;
}
if (allConfig) {

View file

@ -113,6 +113,7 @@ const initializeClient = async ({
if (!isAzureOpenAI && openAIConfig) {
clientOptions.streamRate = openAIConfig.streamRate;
clientOptions.titleModel = openAIConfig.titleModel;
}
/** @type {undefined | TBaseEndpoint} */
@ -140,6 +141,7 @@ const initializeClient = async ({
},
clientOptions,
);
clientOptions.modelOptions.user = req.user.id;
const options = getLLMConfig(apiKey, clientOptions);
if (!clientOptions.streamRate) {
return options;

View file

@ -9,6 +9,7 @@ const { isEnabled } = require('~/server/utils');
* @param {Object} options - Additional options for configuring the LLM.
* @param {Object} [options.modelOptions] - Model-specific options.
* @param {string} [options.modelOptions.model] - The name of the model to use.
* @param {string} [options.modelOptions.user] - The user ID
* @param {number} [options.modelOptions.temperature] - Controls randomness in output generation (0-2).
* @param {number} [options.modelOptions.top_p] - Controls diversity via nucleus sampling (0-1).
* @param {number} [options.modelOptions.frequency_penalty] - Reduces repetition of token sequences (-2 to 2).
@ -23,13 +24,13 @@ const { isEnabled } = require('~/server/utils');
* @param {boolean} [options.streaming] - Whether to use streaming mode.
* @param {Object} [options.addParams] - Additional parameters to add to the model options.
* @param {string[]} [options.dropParams] - Parameters to remove from the model options.
* @param {string|null} [endpoint=null] - The endpoint name
* @returns {Object} Configuration options for creating an LLM instance.
*/
function getLLMConfig(apiKey, options = {}) {
function getLLMConfig(apiKey, options = {}, endpoint = null) {
const {
modelOptions = {},
reverseProxyUrl,
useOpenRouter,
defaultQuery,
headers,
proxy,
@ -56,9 +57,14 @@ function getLLMConfig(apiKey, options = {}) {
});
}
let useOpenRouter;
/** @type {OpenAIClientOptions['configuration']} */
const configOptions = {};
if (useOpenRouter || (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter))) {
if (
(reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) ||
(endpoint && endpoint.toLowerCase().includes(KnownEndpoints.openrouter))
) {
useOpenRouter = true;
llmConfig.include_reasoning = true;
configOptions.baseURL = reverseProxyUrl;
configOptions.defaultHeaders = Object.assign(
@ -118,6 +124,13 @@ function getLLMConfig(apiKey, options = {}) {
llmConfig.organization = process.env.OPENAI_ORGANIZATION;
}
if (useOpenRouter && llmConfig.reasoning_effort != null) {
llmConfig.reasoning = {
effort: llmConfig.reasoning_effort,
};
delete llmConfig.reasoning_effort;
}
return {
/** @type {OpenAIClientOptions} */
llmConfig,

View file

@ -1,4 +1,3 @@
// Code Files
const axios = require('axios');
const FormData = require('form-data');
const { getCodeBaseURL } = require('@librechat/agents');
@ -16,7 +15,8 @@ const MAX_FILE_SIZE = 150 * 1024 * 1024;
async function getCodeOutputDownloadStream(fileIdentifier, apiKey) {
try {
const baseURL = getCodeBaseURL();
const response = await axios({
/** @type {import('axios').AxiosRequestConfig} */
const options = {
method: 'get',
url: `${baseURL}/download/${fileIdentifier}`,
responseType: 'stream',
@ -25,10 +25,22 @@ async function getCodeOutputDownloadStream(fileIdentifier, apiKey) {
'X-API-Key': apiKey,
},
timeout: 15000,
});
};
if (process.env.PROXY) {
options.proxy = {
host: process.env.PROXY,
protocol: process.env.PROXY.startsWith('https') ? 'https' : 'http',
};
}
const response = await axios(options);
return response;
} catch (error) {
logAxiosError({
message: `Error downloading code environment file stream: ${error.message}`,
error,
});
throw new Error(`Error downloading file: ${error.message}`);
}
}
@ -54,7 +66,8 @@ async function uploadCodeEnvFile({ req, stream, filename, apiKey, entity_id = ''
form.append('file', stream, filename);
const baseURL = getCodeBaseURL();
const response = await axios.post(`${baseURL}/upload`, form, {
/** @type {import('axios').AxiosRequestConfig} */
const options = {
headers: {
...form.getHeaders(),
'Content-Type': 'multipart/form-data',
@ -64,7 +77,16 @@ async function uploadCodeEnvFile({ req, stream, filename, apiKey, entity_id = ''
},
maxContentLength: MAX_FILE_SIZE,
maxBodyLength: MAX_FILE_SIZE,
});
};
if (process.env.PROXY) {
options.proxy = {
host: process.env.PROXY,
protocol: process.env.PROXY.startsWith('https') ? 'https' : 'http',
};
}
const response = await axios.post(`${baseURL}/upload`, form, options);
/** @type {{ message: string; session_id: string; files: Array<{ fileId: string; filename: string }> }} */
const result = response.data;

View file

@ -4,7 +4,9 @@ const { HttpsProxyAgent } = require('https-proxy-agent');
const { EModelEndpoint, defaultModels, CacheKeys } = require('librechat-data-provider');
const { inputSchema, logAxiosError, extractBaseURL, processModelData } = require('~/utils');
const { OllamaClient } = require('~/app/clients/OllamaClient');
const { isUserProvided } = require('~/server/utils');
const getLogStores = require('~/cache/getLogStores');
const { logger } = require('~/config');
/**
* Splits a string by commas and trims each resulting value.
@ -42,7 +44,7 @@ const fetchModels = async ({
user,
apiKey,
baseURL,
name = 'OpenAI',
name = EModelEndpoint.openAI,
azure = false,
userIdQuery = false,
createTokenConfig = true,
@ -64,12 +66,19 @@ const fetchModels = async ({
try {
const options = {
headers: {
Authorization: `Bearer ${apiKey}`,
},
headers: {},
timeout: 5000,
};
if (name === EModelEndpoint.anthropic) {
options.headers = {
'x-api-key': apiKey,
'anthropic-version': process.env.ANTHROPIC_VERSION || '2023-06-01',
};
} else {
options.headers.Authorization = `Bearer ${apiKey}`;
}
if (process.env.PROXY) {
options.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
@ -148,7 +157,7 @@ const fetchOpenAIModels = async (opts, _models = []) => {
baseURL,
azure: opts.azure,
user: opts.user,
name: baseURL,
name: EModelEndpoint.openAI,
});
}
@ -157,7 +166,7 @@ const fetchOpenAIModels = async (opts, _models = []) => {
}
if (baseURL === openaiBaseURL) {
const regex = /(text-davinci-003|gpt-|o\d+-)/;
const regex = /(text-davinci-003|gpt-|o\d+)/;
const excludeRegex = /audio|realtime/;
models = models.filter((model) => regex.test(model) && !excludeRegex.test(model));
const instructModels = models.filter((model) => model.includes('instruct'));
@ -231,13 +240,71 @@ const getChatGPTBrowserModels = () => {
return models;
};
const getAnthropicModels = () => {
/**
* Fetches models from the Anthropic API.
* @async
* @function
* @param {object} opts - The options for fetching the models.
* @param {string} opts.user - The user ID to send to the API.
* @param {string[]} [_models=[]] - The models to use as a fallback.
*/
const fetchAnthropicModels = async (opts, _models = []) => {
let models = _models.slice() ?? [];
let apiKey = process.env.ANTHROPIC_API_KEY;
const anthropicBaseURL = 'https://api.anthropic.com/v1';
let baseURL = anthropicBaseURL;
let reverseProxyUrl = process.env.ANTHROPIC_REVERSE_PROXY;
if (reverseProxyUrl) {
baseURL = extractBaseURL(reverseProxyUrl);
}
if (!apiKey) {
return models;
}
const modelsCache = getLogStores(CacheKeys.MODEL_QUERIES);
const cachedModels = await modelsCache.get(baseURL);
if (cachedModels) {
return cachedModels;
}
if (baseURL) {
models = await fetchModels({
apiKey,
baseURL,
user: opts.user,
name: EModelEndpoint.anthropic,
tokenKey: EModelEndpoint.anthropic,
});
}
if (models.length === 0) {
return _models;
}
await modelsCache.set(baseURL, models);
return models;
};
const getAnthropicModels = async (opts = {}) => {
let models = defaultModels[EModelEndpoint.anthropic];
if (process.env.ANTHROPIC_MODELS) {
models = splitAndTrim(process.env.ANTHROPIC_MODELS);
return models;
}
return models;
if (isUserProvided(process.env.ANTHROPIC_API_KEY)) {
return models;
}
try {
return await fetchAnthropicModels(opts, models);
} catch (error) {
logger.error('Error fetching Anthropic models:', error);
return models;
}
};
const getGoogleModels = () => {

View file

@ -352,15 +352,15 @@ describe('splitAndTrim', () => {
});
describe('getAnthropicModels', () => {
it('returns default models when ANTHROPIC_MODELS is not set', () => {
it('returns default models when ANTHROPIC_MODELS is not set', async () => {
delete process.env.ANTHROPIC_MODELS;
const models = getAnthropicModels();
const models = await getAnthropicModels();
expect(models).toEqual(defaultModels[EModelEndpoint.anthropic]);
});
it('returns models from ANTHROPIC_MODELS when set', () => {
it('returns models from ANTHROPIC_MODELS when set', async () => {
process.env.ANTHROPIC_MODELS = 'claude-1, claude-2 ';
const models = getAnthropicModels();
const models = await getAnthropicModels();
expect(models).toEqual(['claude-1', 'claude-2']);
});
});