From ded3cd8876501c1daa67d54bbb4b02d2b85efc3d Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Mon, 10 Mar 2025 17:23:46 -0400 Subject: [PATCH 01/12] =?UTF-8?q?=F0=9F=94=8D=20feat:=20Mistral=20OCR=20AP?= =?UTF-8?q?I=20/=20Upload=20Files=20as=20Text=20(#6274)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: move `loadAuthValues` to `~/services/Tools/credentials` * feat: add createAxiosInstance function to configure axios with proxy support * WIP: First pass mistral ocr * refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic * refactor: improve document formatting in encodeAndFormat function * refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config) * fix: update getFiles call to include files with `text` property as well * refactor: move file handling to `initializeAgentOptions` * refactor: enhance addImageURLs method to handle OCR text and improve message formatting * refactor: update message formatting to handle OCR text in various content types * refactor: remove unused resendFiles property from compactAgentsSchema * fix: add error handling for Mistral OCR document upload and logging * refactor: integrate OCR capability into file upload options and configuration * refactor: skip processing for text source files in delete request, as they are directly tied to database * feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling * fix: source icon styling * wip: first pass, frontend file context agent resources * refactor: add hover card with contextual information for File Context (OCR) in FileContext component * feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization * feat: implement OCR config; fix: agent resource deletion for ocr files * feat: enhance agent initialization by adding OCR capability check in resource priming * ci: fix `~/config` module mock * ci: add OCR property expectation in AppService tests * refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed * ci: add unit test to ensure environment variable references are not parsed in OCR config * refactor: disable base64 image inclusion in OCR request * refactor: enhance OCR configuration handling by validating environment variables and providing defaults * refactor: use file stream from disk for mistral ocr api --- api/app/clients/BaseClient.js | 10 +- api/app/clients/tools/util/handleTools.js | 41 +- api/app/clients/tools/util/index.js | 3 +- api/config/index.js | 16 + api/models/Conversation.js | 52 +- api/models/File.js | 6 +- api/server/controllers/agents/callbacks.js | 2 +- api/server/controllers/agents/client.js | 27 +- api/server/controllers/tools.js | 3 +- api/server/middleware/buildEndpointOption.js | 11 +- api/server/routes/files/files.js | 2 +- api/server/services/AppService.js | 9 +- api/server/services/AppService.spec.js | 30 + api/server/services/Endpoints/agents/build.js | 12 +- .../services/Endpoints/agents/initialize.js | 68 +- api/server/services/Files/MistralOCR/crud.js | 207 +++++ .../services/Files/MistralOCR/crud.spec.js | 737 ++++++++++++++++++ api/server/services/Files/MistralOCR/index.js | 5 + api/server/services/Files/images/encode.js | 8 + api/server/services/Files/process.js | 54 +- api/server/services/Files/strategies.js | 23 + api/server/services/Tools/credentials.js | 56 ++ api/server/utils/handleText.js | 1 + api/test/__mocks__/logger.js | 3 + api/typedefs.js | 48 ++ client/src/common/agents-types.ts | 1 + client/src/common/types.ts | 1 + .../Chat/Input/Files/AttachFileMenu.tsx | 13 +- .../Chat/Input/Files/DragDropModal.tsx | 8 +- .../Chat/Input/Files/FilePreview.tsx | 7 +- .../Chat/Input/Files/SourceIcon.tsx | 41 +- .../SidePanel/Agents/AgentConfig.tsx | 29 +- .../SidePanel/Agents/FileContext.tsx | 128 +++ .../SidePanel/Files/PanelColumns.tsx | 11 +- .../SidePanel/Files/PanelFileCell.tsx | 1 - .../components/SidePanel/Files/PanelTable.tsx | 1 + client/src/data-provider/Files/mutations.ts | 5 +- client/src/locales/en/translation.json | 7 +- client/src/utils/forms.tsx | 16 +- package-lock.json | 2 +- packages/data-provider/package.json | 2 +- packages/data-provider/src/config.ts | 17 +- packages/data-provider/src/index.ts | 1 + packages/data-provider/src/ocr.ts | 14 + packages/data-provider/src/schemas.ts | 1 - .../data-provider/src/types/assistants.ts | 6 +- packages/data-provider/src/types/files.ts | 2 + packages/data-schemas/src/schema/file.ts | 4 + 48 files changed, 1621 insertions(+), 131 deletions(-) create mode 100644 api/server/services/Files/MistralOCR/crud.js create mode 100644 api/server/services/Files/MistralOCR/crud.spec.js create mode 100644 api/server/services/Files/MistralOCR/index.js create mode 100644 api/server/services/Tools/credentials.js create mode 100644 client/src/components/SidePanel/Agents/FileContext.tsx create mode 100644 packages/data-provider/src/ocr.ts diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 61b39a8f6d..77e14c07d0 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -1121,9 +1121,13 @@ class BaseClient { return message; } - const files = await getFiles({ - file_id: { $in: fileIds }, - }); + const files = await getFiles( + { + file_id: { $in: fileIds }, + }, + {}, + {}, + ); await this.addImageURLs(message, files, this.visionMode); diff --git a/api/app/clients/tools/util/handleTools.js b/api/app/clients/tools/util/handleTools.js index ae19a158ee..063d6e0327 100644 --- a/api/app/clients/tools/util/handleTools.js +++ b/api/app/clients/tools/util/handleTools.js @@ -21,6 +21,7 @@ const { } = require('../'); const { primeFiles: primeCodeFiles } = require('~/server/services/Files/Code/process'); const { createFileSearchTool, primeFiles: primeSearchFiles } = require('./fileSearch'); +const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { createMCPTool } = require('~/server/services/MCP'); const { loadSpecs } = require('./loadSpecs'); const { logger } = require('~/config'); @@ -90,45 +91,6 @@ const validateTools = async (user, tools = []) => { } }; -const loadAuthValues = async ({ userId, authFields, throwError = true }) => { - let authValues = {}; - - /** - * Finds the first non-empty value for the given authentication field, supporting alternate fields. - * @param {string[]} fields Array of strings representing the authentication fields. Supports alternate fields delimited by "||". - * @returns {Promise<{ authField: string, authValue: string} | null>} An object containing the authentication field and value, or null if not found. - */ - const findAuthValue = async (fields) => { - for (const field of fields) { - let value = process.env[field]; - if (value) { - return { authField: field, authValue: value }; - } - try { - value = await getUserPluginAuthValue(userId, field, throwError); - } catch (err) { - if (field === fields[fields.length - 1] && !value) { - throw err; - } - } - if (value) { - return { authField: field, authValue: value }; - } - } - return null; - }; - - for (let authField of authFields) { - const fields = authField.split('||'); - const result = await findAuthValue(fields); - if (result) { - authValues[result.authField] = result.authValue; - } - } - - return authValues; -}; - /** @typedef {typeof import('@langchain/core/tools').Tool} ToolConstructor */ /** @typedef {import('@langchain/core/tools').Tool} Tool */ @@ -348,7 +310,6 @@ const loadTools = async ({ module.exports = { loadToolWithAuth, - loadAuthValues, validateTools, loadTools, }; diff --git a/api/app/clients/tools/util/index.js b/api/app/clients/tools/util/index.js index 73d10270b6..ea67bb4ced 100644 --- a/api/app/clients/tools/util/index.js +++ b/api/app/clients/tools/util/index.js @@ -1,9 +1,8 @@ -const { validateTools, loadTools, loadAuthValues } = require('./handleTools'); +const { validateTools, loadTools } = require('./handleTools'); const handleOpenAIErrors = require('./handleOpenAIErrors'); module.exports = { handleOpenAIErrors, - loadAuthValues, validateTools, loadTools, }; diff --git a/api/config/index.js b/api/config/index.js index aaf8bb2764..dd765efb8e 100644 --- a/api/config/index.js +++ b/api/config/index.js @@ -1,3 +1,4 @@ +const axios = require('axios'); const { EventSource } = require('eventsource'); const { Time, CacheKeys } = require('librechat-data-provider'); const logger = require('./winston'); @@ -47,9 +48,24 @@ const sendEvent = (res, event) => { res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`); }; +function createAxiosInstance() { + const instance = axios.create(); + + if (process.env.proxy) { + const url = new URL(process.env.proxy); + instance.defaults.proxy = { + host: url.hostname, + protocol: url.protocol.replace(':', ''), + }; + } + + return instance; +} + module.exports = { logger, sendEvent, getMCPManager, + createAxiosInstance, getFlowStateManager, }; diff --git a/api/models/Conversation.js b/api/models/Conversation.js index 9e51926ebc..f609b96c5c 100644 --- a/api/models/Conversation.js +++ b/api/models/Conversation.js @@ -15,19 +15,6 @@ const searchConversation = async (conversationId) => { throw new Error('Error searching conversation'); } }; -/** - * Searches for a conversation by conversationId and returns associated file ids. - * @param {string} conversationId - The conversation's ID. - * @returns {Promise} - */ -const getConvoFiles = async (conversationId) => { - try { - return (await Conversation.findOne({ conversationId }, 'files').lean())?.files ?? []; - } catch (error) { - logger.error('[getConvoFiles] Error getting conversation files', error); - throw new Error('Error getting conversation files'); - } -}; /** * Retrieves a single conversation for a given user and conversation ID. @@ -73,9 +60,46 @@ const deleteNullOrEmptyConversations = async () => { } }; +/** + * Retrieves files from a conversation that have either embedded=true + * or a metadata.fileIdentifier. Simplified and efficient query. + * + * @param {string} conversationId - The conversation ID + * @returns {Promise} - Filtered array of matching file objects + */ +const getToolFiles = async (conversationId) => { + try { + const [result] = await Conversation.aggregate([ + { $match: { conversationId } }, + { + $project: { + files: { + $filter: { + input: '$files', + as: 'file', + cond: { + $or: [ + { $eq: ['$$file.embedded', true] }, + { $ifNull: ['$$file.metadata.fileIdentifier', false] }, + ], + }, + }, + }, + _id: 0, + }, + }, + ]).exec(); + + return result?.files || []; + } catch (error) { + logger.error('[getConvoEmbeddedFiles] Error fetching embedded files:', error); + throw new Error('Error fetching embedded files'); + } +}; + module.exports = { Conversation, - getConvoFiles, + getToolFiles, searchConversation, deleteNullOrEmptyConversations, /** diff --git a/api/models/File.js b/api/models/File.js index 870a18a7c8..3d62f27abb 100644 --- a/api/models/File.js +++ b/api/models/File.js @@ -17,11 +17,13 @@ const findFileById = async (file_id, options = {}) => { * Retrieves files matching a given filter, sorted by the most recently updated. * @param {Object} filter - The filter criteria to apply. * @param {Object} [_sortOptions] - Optional sort parameters. + * @param {Object|String} [selectFields={ text: 0 }] - Fields to include/exclude in the query results. + * Default excludes the 'text' field. * @returns {Promise>} A promise that resolves to an array of file documents. */ -const getFiles = async (filter, _sortOptions) => { +const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => { const sortOptions = { updatedAt: -1, ..._sortOptions }; - return await File.find(filter).sort(sortOptions).lean(); + return await File.find(filter).select(selectFields).sort(sortOptions).lean(); }; /** diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 45beefe7e6..6622ec3815 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -10,8 +10,8 @@ const { ChatModelStreamHandler, } = require('@librechat/agents'); const { processCodeOutput } = require('~/server/services/Files/Code/process'); +const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { saveBase64Image } = require('~/server/services/Files/process'); -const { loadAuthValues } = require('~/app/clients/tools/util'); const { logger, sendEvent } = require('~/config'); /** @typedef {import('@librechat/agents').Graph} Graph */ diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 628b62e5ea..d591fe3247 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -223,14 +223,23 @@ class AgentClient extends BaseClient { }; } + /** + * + * @param {TMessage} message + * @param {Array} attachments + * @returns {Promise>>} + */ async addImageURLs(message, attachments) { - const { files, image_urls } = await encodeAndFormat( + const { files, text, image_urls } = await encodeAndFormat( this.options.req, attachments, this.options.agent.provider, VisionModes.agents, ); message.image_urls = image_urls.length ? image_urls : undefined; + if (text && text.length) { + message.ocr = text; + } return files; } @@ -308,7 +317,21 @@ class AgentClient extends BaseClient { assistantName: this.options?.modelLabel, }); - const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount; + if (message.ocr && i !== orderedMessages.length - 1) { + if (typeof formattedMessage.content === 'string') { + formattedMessage.content = message.ocr + '\n' + formattedMessage.content; + } else { + const textPart = formattedMessage.content.find((part) => part.type === 'text'); + textPart + ? (textPart.text = message.ocr + '\n' + textPart.text) + : formattedMessage.content.unshift({ type: 'text', text: message.ocr }); + } + } else if (message.ocr && i === orderedMessages.length - 1) { + systemContent = [systemContent, message.ocr].join('\n'); + } + + const needsTokenCount = + (this.contextStrategy && !orderedMessages[i].tokenCount) || message.ocr; /* If tokens were never counted, or, is a Vision request and the message has files, count again */ if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) { diff --git a/api/server/controllers/tools.js b/api/server/controllers/tools.js index 1c5330af35..b37b6fcb8c 100644 --- a/api/server/controllers/tools.js +++ b/api/server/controllers/tools.js @@ -10,7 +10,8 @@ const { const { processFileURL, uploadImageBuffer } = require('~/server/services/Files/process'); const { processCodeOutput } = require('~/server/services/Files/Code/process'); const { createToolCall, getToolCallsByConvo } = require('~/models/ToolCall'); -const { loadAuthValues, loadTools } = require('~/app/clients/tools/util'); +const { loadAuthValues } = require('~/server/services/Tools/credentials'); +const { loadTools } = require('~/app/clients/tools/util'); const { checkAccess } = require('~/server/middleware'); const { getMessage } = require('~/models/Message'); const { logger } = require('~/config'); diff --git a/api/server/middleware/buildEndpointOption.js b/api/server/middleware/buildEndpointOption.js index a0ce754a1c..041864b025 100644 --- a/api/server/middleware/buildEndpointOption.js +++ b/api/server/middleware/buildEndpointOption.js @@ -10,7 +10,6 @@ const openAI = require('~/server/services/Endpoints/openAI'); const agents = require('~/server/services/Endpoints/agents'); const custom = require('~/server/services/Endpoints/custom'); const google = require('~/server/services/Endpoints/google'); -const { getConvoFiles } = require('~/models/Conversation'); const { handleError } = require('~/server/utils'); const buildFunction = { @@ -87,16 +86,8 @@ async function buildEndpointOption(req, res, next) { // TODO: use `getModelsConfig` only when necessary const modelsConfig = await getModelsConfig(req); - const { resendFiles = true } = req.body.endpointOption; req.body.endpointOption.modelsConfig = modelsConfig; - if (isAgents && resendFiles && req.body.conversationId) { - const fileIds = await getConvoFiles(req.body.conversationId); - const requestFiles = req.body.files ?? []; - if (requestFiles.length || fileIds.length) { - req.body.endpointOption.attachments = processFiles(requestFiles, fileIds); - } - } else if (req.body.files) { - // hold the promise + if (req.body.files && !isAgents) { req.body.endpointOption.attachments = processFiles(req.body.files); } next(); diff --git a/api/server/routes/files/files.js b/api/server/routes/files/files.js index c320f7705b..c371b8e28e 100644 --- a/api/server/routes/files/files.js +++ b/api/server/routes/files/files.js @@ -16,7 +16,7 @@ const { } = require('~/server/services/Files/process'); const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { getOpenAIClient } = require('~/server/controllers/assistants/helpers'); -const { loadAuthValues } = require('~/app/clients/tools/util'); +const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { getAgent } = require('~/models/Agent'); const { getFiles } = require('~/models/File'); const { logger } = require('~/config'); diff --git a/api/server/services/AppService.js b/api/server/services/AppService.js index d194d31a6b..1accd7eba6 100644 --- a/api/server/services/AppService.js +++ b/api/server/services/AppService.js @@ -1,4 +1,9 @@ -const { FileSources, EModelEndpoint, getConfigDefaults } = require('librechat-data-provider'); +const { + FileSources, + EModelEndpoint, + loadOCRConfig, + getConfigDefaults, +} = require('librechat-data-provider'); const { checkVariables, checkHealth, checkConfig, checkAzureVariables } = require('./start/checks'); const { azureAssistantsDefaults, assistantsConfigSetup } = require('./start/assistants'); const { initializeFirebase } = require('./Files/Firebase/initialize'); @@ -25,6 +30,7 @@ const AppService = async (app) => { const config = (await loadCustomConfig()) ?? {}; const configDefaults = getConfigDefaults(); + const ocr = loadOCRConfig(config.ocr); const filteredTools = config.filteredTools; const includedTools = config.includedTools; const fileStrategy = config.fileStrategy ?? configDefaults.fileStrategy; @@ -57,6 +63,7 @@ const AppService = async (app) => { const interfaceConfig = await loadDefaultInterface(config, configDefaults); const defaultLocals = { + ocr, paths, fileStrategy, socialLogins, diff --git a/api/server/services/AppService.spec.js b/api/server/services/AppService.spec.js index 61ac80fc6c..e47bfe7d5d 100644 --- a/api/server/services/AppService.spec.js +++ b/api/server/services/AppService.spec.js @@ -120,6 +120,7 @@ describe('AppService', () => { }, }, paths: expect.anything(), + ocr: expect.anything(), imageOutputType: expect.any(String), fileConfig: undefined, secureImageLinks: undefined, @@ -588,4 +589,33 @@ describe('AppService updating app.locals and issuing warnings', () => { ); }); }); + + it('should not parse environment variable references in OCR config', async () => { + // Mock custom configuration with env variable references in OCR config + const mockConfig = { + ocr: { + apiKey: '${OCR_API_KEY_CUSTOM_VAR_NAME}', + baseURL: '${OCR_BASEURL_CUSTOM_VAR_NAME}', + strategy: 'mistral_ocr', + mistralModel: 'mistral-medium', + }, + }; + + require('./Config/loadCustomConfig').mockImplementationOnce(() => Promise.resolve(mockConfig)); + + // Set actual environment variables with different values + process.env.OCR_API_KEY_CUSTOM_VAR_NAME = 'actual-api-key'; + process.env.OCR_BASEURL_CUSTOM_VAR_NAME = 'https://actual-ocr-url.com'; + + // Initialize app + const app = { locals: {} }; + await AppService(app); + + // Verify that the raw string references were preserved and not interpolated + expect(app.locals.ocr).toBeDefined(); + expect(app.locals.ocr.apiKey).toEqual('${OCR_API_KEY_CUSTOM_VAR_NAME}'); + expect(app.locals.ocr.baseURL).toEqual('${OCR_BASEURL_CUSTOM_VAR_NAME}'); + expect(app.locals.ocr.strategy).toEqual('mistral_ocr'); + expect(app.locals.ocr.mistralModel).toEqual('mistral-medium'); + }); }); diff --git a/api/server/services/Endpoints/agents/build.js b/api/server/services/Endpoints/agents/build.js index 027937e7fd..999cdc16be 100644 --- a/api/server/services/Endpoints/agents/build.js +++ b/api/server/services/Endpoints/agents/build.js @@ -2,15 +2,8 @@ const { loadAgent } = require('~/models/Agent'); const { logger } = require('~/config'); const buildOptions = (req, endpoint, parsedBody) => { - const { - spec, - iconURL, - agent_id, - instructions, - maxContextTokens, - resendFiles = true, - ...model_parameters - } = parsedBody; + const { spec, iconURL, agent_id, instructions, maxContextTokens, ...model_parameters } = + parsedBody; const agentPromise = loadAgent({ req, agent_id, @@ -24,7 +17,6 @@ const buildOptions = (req, endpoint, parsedBody) => { iconURL, endpoint, agent_id, - resendFiles, instructions, maxContextTokens, model_parameters, diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index 1cf8ad7a67..11c8dc6fc4 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -2,6 +2,7 @@ const { createContentAggregator, Providers } = require('@librechat/agents'); const { EModelEndpoint, getResponseSender, + AgentCapabilities, providerEndpointMap, } = require('librechat-data-provider'); const { @@ -15,10 +16,13 @@ const initCustom = require('~/server/services/Endpoints/custom/initialize'); const initGoogle = require('~/server/services/Endpoints/google/initialize'); const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts'); const { getCustomEndpointConfig } = require('~/server/services/Config'); +const { processFiles } = require('~/server/services/Files/process'); const { loadAgentTools } = require('~/server/services/ToolService'); const AgentClient = require('~/server/controllers/agents/client'); +const { getToolFiles } = require('~/models/Conversation'); const { getModelMaxTokens } = require('~/utils'); const { getAgent } = require('~/models/Agent'); +const { getFiles } = require('~/models/File'); const { logger } = require('~/config'); const providerConfigMap = { @@ -34,20 +38,38 @@ const providerConfigMap = { }; /** - * + * @param {ServerRequest} req * @param {Promise> | undefined} _attachments * @param {AgentToolResources | undefined} _tool_resources * @returns {Promise<{ attachments: Array | undefined, tool_resources: AgentToolResources | undefined }>} */ -const primeResources = async (_attachments, _tool_resources) => { +const primeResources = async (req, _attachments, _tool_resources) => { try { + /** @type {Array | undefined} */ + let attachments; + const tool_resources = _tool_resources ?? {}; + const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes( + AgentCapabilities.ocr, + ); + if (tool_resources.ocr?.file_ids && isOCREnabled) { + const context = await getFiles( + { + file_id: { $in: tool_resources.ocr.file_ids }, + }, + {}, + {}, + ); + attachments = (attachments ?? []).concat(context); + } if (!_attachments) { - return { attachments: undefined, tool_resources: _tool_resources }; + return { attachments, tool_resources }; } /** @type {Array | undefined} */ const files = await _attachments; - const attachments = []; - const tool_resources = _tool_resources ?? {}; + if (!attachments) { + /** @type {Array} */ + attachments = []; + } for (const file of files) { if (!file) { @@ -82,7 +104,6 @@ const primeResources = async (_attachments, _tool_resources) => { * @param {ServerResponse} params.res * @param {Agent} params.agent * @param {object} [params.endpointOption] - * @param {AgentToolResources} [params.tool_resources] * @param {boolean} [params.isInitialAgent] * @returns {Promise} */ @@ -91,9 +112,28 @@ const initializeAgentOptions = async ({ res, agent, endpointOption, - tool_resources, isInitialAgent = false, }) => { + let currentFiles; + const requestFiles = req.body.files ?? []; + if ( + isInitialAgent && + req.body.conversationId != null && + agent.model_parameters?.resendFiles === true + ) { + const fileIds = (await getToolFiles(req.body.conversationId)).map((f) => f.file_id); + if (requestFiles.length || fileIds.length) { + currentFiles = await processFiles(requestFiles, fileIds); + } + } else if (isInitialAgent && requestFiles.length) { + currentFiles = await processFiles(requestFiles); + } + + const { attachments, tool_resources } = await primeResources( + req, + currentFiles, + agent.tool_resources, + ); const { tools, toolContextMap } = await loadAgentTools({ req, res, @@ -160,6 +200,7 @@ const initializeAgentOptions = async ({ return { ...agent, tools, + attachments, toolContextMap, maxContextTokens: agent.max_context_tokens ?? @@ -197,11 +238,6 @@ const initializeClient = async ({ req, res, endpointOption }) => { throw new Error('Agent not found'); } - const { attachments, tool_resources } = await primeResources( - endpointOption.attachments, - primaryAgent.tool_resources, - ); - const agentConfigs = new Map(); // Handle primary agent @@ -210,7 +246,6 @@ const initializeClient = async ({ req, res, endpointOption }) => { res, agent: primaryAgent, endpointOption, - tool_resources, isInitialAgent: true, }); @@ -240,18 +275,19 @@ const initializeClient = async ({ req, res, endpointOption }) => { const client = new AgentClient({ req, - agent: primaryConfig, sender, - attachments, contentParts, + agentConfigs, eventHandlers, collectedUsage, artifactPromises, + agent: primaryConfig, spec: endpointOption.spec, iconURL: endpointOption.iconURL, - agentConfigs, endpoint: EModelEndpoint.agents, + attachments: primaryConfig.attachments, maxContextTokens: primaryConfig.maxContextTokens, + resendFiles: primaryConfig.model_parameters?.resendFiles ?? true, }); return { client }; diff --git a/api/server/services/Files/MistralOCR/crud.js b/api/server/services/Files/MistralOCR/crud.js new file mode 100644 index 0000000000..cef8297519 --- /dev/null +++ b/api/server/services/Files/MistralOCR/crud.js @@ -0,0 +1,207 @@ +// ~/server/services/Files/MistralOCR/crud.js +const fs = require('fs'); +const path = require('path'); +const FormData = require('form-data'); +const { FileSources, envVarRegex, extractEnvVariable } = require('librechat-data-provider'); +const { loadAuthValues } = require('~/server/services/Tools/credentials'); +const { logger, createAxiosInstance } = require('~/config'); +const { logAxiosError } = require('~/utils'); + +const axios = createAxiosInstance(); + +/** + * Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory + * + * @param {Object} params Upload parameters + * @param {string} params.filePath The path to the file on disk + * @param {string} [params.fileName] Optional filename to use (defaults to the name from filePath) + * @param {string} params.apiKey Mistral API key + * @param {string} [params.baseURL=https://api.mistral.ai/v1] Mistral API base URL + * @returns {Promise} The response from Mistral API + */ +async function uploadDocumentToMistral({ + filePath, + fileName = '', + apiKey, + baseURL = 'https://api.mistral.ai/v1', +}) { + const form = new FormData(); + form.append('purpose', 'ocr'); + const actualFileName = fileName || path.basename(filePath); + const fileStream = fs.createReadStream(filePath); + form.append('file', fileStream, { filename: actualFileName }); + + return axios + .post(`${baseURL}/files`, form, { + headers: { + Authorization: `Bearer ${apiKey}`, + ...form.getHeaders(), + }, + maxBodyLength: Infinity, + maxContentLength: Infinity, + }) + .then((res) => res.data) + .catch((error) => { + logger.error('Error uploading document to Mistral:', error.message); + throw error; + }); +} + +async function getSignedUrl({ + apiKey, + fileId, + expiry = 24, + baseURL = 'https://api.mistral.ai/v1', +}) { + return axios + .get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, { + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }) + .then((res) => res.data) + .catch((error) => { + logger.error('Error fetching signed URL:', error.message); + throw error; + }); +} + +/** + * @param {Object} params + * @param {string} params.apiKey + * @param {string} params.documentUrl + * @param {string} [params.baseURL] + * @returns {Promise} + */ +async function performOCR({ + apiKey, + documentUrl, + model = 'mistral-ocr-latest', + baseURL = 'https://api.mistral.ai/v1', +}) { + return axios + .post( + `${baseURL}/ocr`, + { + model, + include_image_base64: false, + document: { + type: 'document_url', + document_url: documentUrl, + }, + }, + { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiKey}`, + }, + }, + ) + .then((res) => res.data) + .catch((error) => { + logger.error('Error performing OCR:', error.message); + throw error; + }); +} + +function extractVariableName(str) { + const match = str.match(envVarRegex); + return match ? match[1] : null; +} + +const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => { + try { + /** @type {TCustomConfig['ocr']} */ + const ocrConfig = req.app.locals?.ocr; + + const apiKeyConfig = ocrConfig.apiKey || ''; + const baseURLConfig = ocrConfig.baseURL || ''; + + const isApiKeyEnvVar = envVarRegex.test(apiKeyConfig); + const isBaseURLEnvVar = envVarRegex.test(baseURLConfig); + + const isApiKeyEmpty = !apiKeyConfig.trim(); + const isBaseURLEmpty = !baseURLConfig.trim(); + + let apiKey, baseURL; + + if (isApiKeyEnvVar || isBaseURLEnvVar || isApiKeyEmpty || isBaseURLEmpty) { + const apiKeyVarName = isApiKeyEnvVar ? extractVariableName(apiKeyConfig) : 'OCR_API_KEY'; + const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL'; + + const authValues = await loadAuthValues({ + userId: req.user.id, + authFields: [baseURLVarName, apiKeyVarName], + optional: new Set([baseURLVarName]), + }); + + apiKey = authValues[apiKeyVarName]; + baseURL = authValues[baseURLVarName]; + } else { + apiKey = apiKeyConfig; + baseURL = baseURLConfig; + } + + const mistralFile = await uploadDocumentToMistral({ + filePath: file.path, + fileName: file.originalname, + apiKey, + baseURL, + }); + + const modelConfig = ocrConfig.mistralModel || ''; + const model = envVarRegex.test(modelConfig) + ? extractEnvVariable(modelConfig) + : modelConfig.trim() || 'mistral-ocr-latest'; + + const signedUrlResponse = await getSignedUrl({ + apiKey, + baseURL, + fileId: mistralFile.id, + }); + + const ocrResult = await performOCR({ + apiKey, + baseURL, + model, + documentUrl: signedUrlResponse.url, + }); + + let aggregatedText = ''; + const images = []; + ocrResult.pages.forEach((page, index) => { + if (ocrResult.pages.length > 1) { + aggregatedText += `# PAGE ${index + 1}\n`; + } + + aggregatedText += page.markdown + '\n\n'; + + if (page.images && page.images.length > 0) { + page.images.forEach((image) => { + if (image.image_base64) { + images.push(image.image_base64); + } + }); + } + }); + + return { + filename: file.originalname, + bytes: aggregatedText.length * 4, + filepath: FileSources.mistral_ocr, + text: aggregatedText, + images, + }; + } catch (error) { + const message = 'Error uploading document to Mistral OCR API'; + logAxiosError({ error, message }); + throw new Error(message); + } +}; + +module.exports = { + uploadDocumentToMistral, + uploadMistralOCR, + getSignedUrl, + performOCR, +}; diff --git a/api/server/services/Files/MistralOCR/crud.spec.js b/api/server/services/Files/MistralOCR/crud.spec.js new file mode 100644 index 0000000000..80ac6f73a4 --- /dev/null +++ b/api/server/services/Files/MistralOCR/crud.spec.js @@ -0,0 +1,737 @@ +const fs = require('fs'); + +const mockAxios = { + interceptors: { + request: { use: jest.fn(), eject: jest.fn() }, + response: { use: jest.fn(), eject: jest.fn() }, + }, + create: jest.fn().mockReturnValue({ + defaults: { + proxy: null, + }, + get: jest.fn().mockResolvedValue({ data: {} }), + post: jest.fn().mockResolvedValue({ data: {} }), + put: jest.fn().mockResolvedValue({ data: {} }), + delete: jest.fn().mockResolvedValue({ data: {} }), + }), + get: jest.fn().mockResolvedValue({ data: {} }), + post: jest.fn().mockResolvedValue({ data: {} }), + put: jest.fn().mockResolvedValue({ data: {} }), + delete: jest.fn().mockResolvedValue({ data: {} }), + reset: jest.fn().mockImplementation(function () { + this.get.mockClear(); + this.post.mockClear(); + this.put.mockClear(); + this.delete.mockClear(); + this.create.mockClear(); + }), +}; + +jest.mock('axios', () => mockAxios); +jest.mock('fs'); +jest.mock('~/utils', () => ({ + logAxiosError: jest.fn(), +})); +jest.mock('~/config', () => ({ + logger: { + error: jest.fn(), + }, + createAxiosInstance: () => mockAxios, +})); +jest.mock('~/server/services/Tools/credentials', () => ({ + loadAuthValues: jest.fn(), +})); + +const { uploadDocumentToMistral, uploadMistralOCR, getSignedUrl, performOCR } = require('./crud'); + +describe('MistralOCR Service', () => { + afterEach(() => { + mockAxios.reset(); + jest.clearAllMocks(); + }); + + describe('uploadDocumentToMistral', () => { + beforeEach(() => { + // Create a more complete mock for file streams that FormData can work with + const mockReadStream = { + on: jest.fn().mockImplementation(function (event, handler) { + // Simulate immediate 'end' event to make FormData complete processing + if (event === 'end') { + handler(); + } + return this; + }), + pipe: jest.fn().mockImplementation(function () { + return this; + }), + pause: jest.fn(), + resume: jest.fn(), + emit: jest.fn(), + once: jest.fn(), + destroy: jest.fn(), + }; + + fs.createReadStream = jest.fn().mockReturnValue(mockReadStream); + + // Mock FormData's append to avoid actual stream processing + jest.mock('form-data', () => { + const mockFormData = function () { + return { + append: jest.fn(), + getHeaders: jest + .fn() + .mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }), + getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')), + getLength: jest.fn().mockReturnValue(100), + }; + }; + return mockFormData; + }); + }); + + it('should upload a document to Mistral API using file streaming', async () => { + const mockResponse = { data: { id: 'file-123', purpose: 'ocr' } }; + mockAxios.post.mockResolvedValueOnce(mockResponse); + + const result = await uploadDocumentToMistral({ + filePath: '/path/to/test.pdf', + fileName: 'test.pdf', + apiKey: 'test-api-key', + }); + + // Check that createReadStream was called with the correct file path + expect(fs.createReadStream).toHaveBeenCalledWith('/path/to/test.pdf'); + + // Since we're mocking FormData, we'll just check that axios was called correctly + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files', + expect.anything(), + expect.objectContaining({ + headers: expect.objectContaining({ + Authorization: 'Bearer test-api-key', + }), + maxBodyLength: Infinity, + maxContentLength: Infinity, + }), + ); + expect(result).toEqual(mockResponse.data); + }); + + it('should handle errors during document upload', async () => { + const errorMessage = 'API error'; + mockAxios.post.mockRejectedValueOnce(new Error(errorMessage)); + + await expect( + uploadDocumentToMistral({ + filePath: '/path/to/test.pdf', + fileName: 'test.pdf', + apiKey: 'test-api-key', + }), + ).rejects.toThrow(); + + const { logger } = require('~/config'); + expect(logger.error).toHaveBeenCalledWith( + expect.stringContaining('Error uploading document to Mistral:'), + expect.any(String), + ); + }); + }); + + describe('getSignedUrl', () => { + it('should fetch signed URL from Mistral API', async () => { + const mockResponse = { data: { url: 'https://document-url.com' } }; + mockAxios.get.mockResolvedValueOnce(mockResponse); + + const result = await getSignedUrl({ + fileId: 'file-123', + apiKey: 'test-api-key', + }); + + expect(mockAxios.get).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files/file-123/url?expiry=24', + { + headers: { + Authorization: 'Bearer test-api-key', + }, + }, + ); + expect(result).toEqual(mockResponse.data); + }); + + it('should handle errors when fetching signed URL', async () => { + const errorMessage = 'API error'; + mockAxios.get.mockRejectedValueOnce(new Error(errorMessage)); + + await expect( + getSignedUrl({ + fileId: 'file-123', + apiKey: 'test-api-key', + }), + ).rejects.toThrow(); + + const { logger } = require('~/config'); + expect(logger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage); + }); + }); + + describe('performOCR', () => { + it('should perform OCR using Mistral API', async () => { + const mockResponse = { + data: { + pages: [{ markdown: 'Page 1 content' }, { markdown: 'Page 2 content' }], + }, + }; + mockAxios.post.mockResolvedValueOnce(mockResponse); + + const result = await performOCR({ + apiKey: 'test-api-key', + documentUrl: 'https://document-url.com', + model: 'mistral-ocr-latest', + }); + + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/ocr', + { + model: 'mistral-ocr-latest', + include_image_base64: false, + document: { + type: 'document_url', + document_url: 'https://document-url.com', + }, + }, + { + headers: { + 'Content-Type': 'application/json', + Authorization: 'Bearer test-api-key', + }, + }, + ); + expect(result).toEqual(mockResponse.data); + }); + + it('should handle errors during OCR processing', async () => { + const errorMessage = 'OCR processing error'; + mockAxios.post.mockRejectedValueOnce(new Error(errorMessage)); + + await expect( + performOCR({ + apiKey: 'test-api-key', + documentUrl: 'https://document-url.com', + }), + ).rejects.toThrow(); + + const { logger } = require('~/config'); + expect(logger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage); + }); + }); + + describe('uploadMistralOCR', () => { + beforeEach(() => { + const mockReadStream = { + on: jest.fn().mockImplementation(function (event, handler) { + if (event === 'end') { + handler(); + } + return this; + }), + pipe: jest.fn().mockImplementation(function () { + return this; + }), + pause: jest.fn(), + resume: jest.fn(), + emit: jest.fn(), + once: jest.fn(), + destroy: jest.fn(), + }; + + fs.createReadStream = jest.fn().mockReturnValue(mockReadStream); + }); + + it('should process OCR for a file with standard configuration', async () => { + // Setup mocks + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + loadAuthValues.mockResolvedValue({ + OCR_API_KEY: 'test-api-key', + OCR_BASEURL: 'https://api.mistral.ai/v1', + }); + + // Mock file upload response + mockAxios.post.mockResolvedValueOnce({ + data: { id: 'file-123', purpose: 'ocr' }, + }); + + // Mock signed URL response + mockAxios.get.mockResolvedValueOnce({ + data: { url: 'https://signed-url.com' }, + }); + + // Mock OCR response with text and images + mockAxios.post.mockResolvedValueOnce({ + data: { + pages: [ + { + markdown: 'Page 1 content', + images: [{ image_base64: 'base64image1' }], + }, + { + markdown: 'Page 2 content', + images: [{ image_base64: 'base64image2' }], + }, + ], + }, + }); + + const req = { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + // Use environment variable syntax to ensure loadAuthValues is called + apiKey: '${OCR_API_KEY}', + baseURL: '${OCR_BASEURL}', + mistralModel: 'mistral-medium', + }, + }, + }, + }; + + const file = { + path: '/tmp/upload/file.pdf', + originalname: 'document.pdf', + }; + + const result = await uploadMistralOCR({ + req, + file, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf'); + + expect(loadAuthValues).toHaveBeenCalledWith({ + userId: 'user123', + authFields: ['OCR_BASEURL', 'OCR_API_KEY'], + optional: expect.any(Set), + }); + + // Verify OCR result + expect(result).toEqual({ + filename: 'document.pdf', + bytes: expect.any(Number), + filepath: 'mistral_ocr', + text: expect.stringContaining('# PAGE 1'), + images: ['base64image1', 'base64image2'], + }); + }); + + it('should process variable references in configuration', async () => { + // Setup mocks with environment variables + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + loadAuthValues.mockResolvedValue({ + CUSTOM_API_KEY: 'custom-api-key', + CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1', + }); + + // Mock API responses + mockAxios.post.mockResolvedValueOnce({ + data: { id: 'file-123', purpose: 'ocr' }, + }); + mockAxios.get.mockResolvedValueOnce({ + data: { url: 'https://signed-url.com' }, + }); + mockAxios.post.mockResolvedValueOnce({ + data: { + pages: [{ markdown: 'Content from custom API' }], + }, + }); + + const req = { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + apiKey: '${CUSTOM_API_KEY}', + baseURL: '${CUSTOM_BASEURL}', + mistralModel: '${CUSTOM_MODEL}', + }, + }, + }, + }; + + // Set environment variable for model + process.env.CUSTOM_MODEL = 'mistral-large'; + + const file = { + path: '/tmp/upload/file.pdf', + originalname: 'document.pdf', + }; + + const result = await uploadMistralOCR({ + req, + file, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf'); + + // Verify that custom environment variables were extracted and used + expect(loadAuthValues).toHaveBeenCalledWith({ + userId: 'user123', + authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'], + optional: expect.any(Set), + }); + + // Check that mistral-large was used in the OCR API call + expect(mockAxios.post).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + model: 'mistral-large', + }), + expect.anything(), + ); + + expect(result.text).toEqual('Content from custom API\n\n'); + }); + + it('should fall back to default values when variables are not properly formatted', async () => { + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + loadAuthValues.mockResolvedValue({ + OCR_API_KEY: 'default-api-key', + OCR_BASEURL: undefined, // Testing optional parameter + }); + + mockAxios.post.mockResolvedValueOnce({ + data: { id: 'file-123', purpose: 'ocr' }, + }); + mockAxios.get.mockResolvedValueOnce({ + data: { url: 'https://signed-url.com' }, + }); + mockAxios.post.mockResolvedValueOnce({ + data: { + pages: [{ markdown: 'Default API result' }], + }, + }); + + const req = { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + // Use environment variable syntax to ensure loadAuthValues is called + apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name + baseURL: '${OCR_BASEURL}', // Using valid env var format + mistralModel: 'mistral-ocr-latest', // Plain string value + }, + }, + }, + }; + + const file = { + path: '/tmp/upload/file.pdf', + originalname: 'document.pdf', + }; + + await uploadMistralOCR({ + req, + file, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf'); + + // Should use the default values + expect(loadAuthValues).toHaveBeenCalledWith({ + userId: 'user123', + authFields: ['OCR_BASEURL', 'INVALID_FORMAT'], + optional: expect.any(Set), + }); + + // Should use the default model when not using environment variable format + expect(mockAxios.post).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + model: 'mistral-ocr-latest', + }), + expect.anything(), + ); + }); + + it('should handle API errors during OCR process', async () => { + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + loadAuthValues.mockResolvedValue({ + OCR_API_KEY: 'test-api-key', + }); + + // Mock file upload to fail + mockAxios.post.mockRejectedValueOnce(new Error('Upload failed')); + + const req = { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + apiKey: 'OCR_API_KEY', + baseURL: 'OCR_BASEURL', + }, + }, + }, + }; + + const file = { + path: '/tmp/upload/file.pdf', + originalname: 'document.pdf', + }; + + await expect( + uploadMistralOCR({ + req, + file, + file_id: 'file123', + entity_id: 'entity123', + }), + ).rejects.toThrow('Error uploading document to Mistral OCR API'); + expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf'); + + const { logAxiosError } = require('~/utils'); + expect(logAxiosError).toHaveBeenCalled(); + }); + + it('should handle single page documents without page numbering', async () => { + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + loadAuthValues.mockResolvedValue({ + OCR_API_KEY: 'test-api-key', + OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included + }); + + // Clear all previous mocks + mockAxios.post.mockClear(); + mockAxios.get.mockClear(); + + // 1. First mock: File upload response + mockAxios.post.mockImplementationOnce(() => + Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }), + ); + + // 2. Second mock: Signed URL response + mockAxios.get.mockImplementationOnce(() => + Promise.resolve({ data: { url: 'https://signed-url.com' } }), + ); + + // 3. Third mock: OCR response + mockAxios.post.mockImplementationOnce(() => + Promise.resolve({ + data: { + pages: [{ markdown: 'Single page content' }], + }, + }), + ); + + const req = { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + apiKey: 'OCR_API_KEY', + baseURL: 'OCR_BASEURL', + mistralModel: 'mistral-ocr-latest', + }, + }, + }, + }; + + const file = { + path: '/tmp/upload/file.pdf', + originalname: 'single-page.pdf', + }; + + const result = await uploadMistralOCR({ + req, + file, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf'); + + // Verify that single page documents don't include page numbering + expect(result.text).not.toContain('# PAGE'); + expect(result.text).toEqual('Single page content\n\n'); + }); + + it('should use literal values in configuration when provided directly', async () => { + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + // We'll still mock this but it should not be used for literal values + loadAuthValues.mockResolvedValue({}); + + // Clear all previous mocks + mockAxios.post.mockClear(); + mockAxios.get.mockClear(); + + // 1. First mock: File upload response + mockAxios.post.mockImplementationOnce(() => + Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }), + ); + + // 2. Second mock: Signed URL response + mockAxios.get.mockImplementationOnce(() => + Promise.resolve({ data: { url: 'https://signed-url.com' } }), + ); + + // 3. Third mock: OCR response + mockAxios.post.mockImplementationOnce(() => + Promise.resolve({ + data: { + pages: [{ markdown: 'Processed with literal config values' }], + }, + }), + ); + + const req = { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + // Direct values that should be used as-is, without variable substitution + apiKey: 'actual-api-key-value', + baseURL: 'https://direct-api-url.mistral.ai/v1', + mistralModel: 'mistral-direct-model', + }, + }, + }, + }; + + const file = { + path: '/tmp/upload/file.pdf', + originalname: 'direct-values.pdf', + }; + + const result = await uploadMistralOCR({ + req, + file, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf'); + + // Verify the correct URL was used with the direct baseURL value + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://direct-api-url.mistral.ai/v1/files', + expect.any(Object), + expect.objectContaining({ + headers: expect.objectContaining({ + Authorization: 'Bearer actual-api-key-value', + }), + }), + ); + + // Check the OCR call was made with the direct model value + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://direct-api-url.mistral.ai/v1/ocr', + expect.objectContaining({ + model: 'mistral-direct-model', + }), + expect.any(Object), + ); + + // Verify the result + expect(result.text).toEqual('Processed with literal config values\n\n'); + + // Verify loadAuthValues was never called since we used direct values + expect(loadAuthValues).not.toHaveBeenCalled(); + }); + + it('should handle empty configuration values and use defaults', async () => { + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + // Set up the mock values to be returned by loadAuthValues + loadAuthValues.mockResolvedValue({ + OCR_API_KEY: 'default-from-env-key', + OCR_BASEURL: 'https://default-from-env.mistral.ai/v1', + }); + + // Clear all previous mocks + mockAxios.post.mockClear(); + mockAxios.get.mockClear(); + + // 1. First mock: File upload response + mockAxios.post.mockImplementationOnce(() => + Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }), + ); + + // 2. Second mock: Signed URL response + mockAxios.get.mockImplementationOnce(() => + Promise.resolve({ data: { url: 'https://signed-url.com' } }), + ); + + // 3. Third mock: OCR response + mockAxios.post.mockImplementationOnce(() => + Promise.resolve({ + data: { + pages: [{ markdown: 'Content from default configuration' }], + }, + }), + ); + + const req = { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + // Empty string values - should fall back to defaults + apiKey: '', + baseURL: '', + mistralModel: '', + }, + }, + }, + }; + + const file = { + path: '/tmp/upload/file.pdf', + originalname: 'empty-config.pdf', + }; + + const result = await uploadMistralOCR({ + req, + file, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf'); + + // Verify loadAuthValues was called with the default variable names + expect(loadAuthValues).toHaveBeenCalledWith({ + userId: 'user123', + authFields: ['OCR_BASEURL', 'OCR_API_KEY'], + optional: expect.any(Set), + }); + + // Verify the API calls used the default values from loadAuthValues + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://default-from-env.mistral.ai/v1/files', + expect.any(Object), + expect.objectContaining({ + headers: expect.objectContaining({ + Authorization: 'Bearer default-from-env-key', + }), + }), + ); + + // Verify the OCR model defaulted to mistral-ocr-latest + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://default-from-env.mistral.ai/v1/ocr', + expect.objectContaining({ + model: 'mistral-ocr-latest', + }), + expect.any(Object), + ); + + // Check result + expect(result.text).toEqual('Content from default configuration\n\n'); + }); + }); +}); diff --git a/api/server/services/Files/MistralOCR/index.js b/api/server/services/Files/MistralOCR/index.js new file mode 100644 index 0000000000..a6223d1ee5 --- /dev/null +++ b/api/server/services/Files/MistralOCR/index.js @@ -0,0 +1,5 @@ +const crud = require('./crud'); + +module.exports = { + ...crud, +}; diff --git a/api/server/services/Files/images/encode.js b/api/server/services/Files/images/encode.js index 94153ffc64..707632fb6a 100644 --- a/api/server/services/Files/images/encode.js +++ b/api/server/services/Files/images/encode.js @@ -49,6 +49,7 @@ async function encodeAndFormat(req, files, endpoint, mode) { const promises = []; const encodingMethods = {}; const result = { + text: '', files: [], image_urls: [], }; @@ -59,6 +60,9 @@ async function encodeAndFormat(req, files, endpoint, mode) { for (let file of files) { const source = file.source ?? FileSources.local; + if (source === FileSources.text && file.text) { + result.text += `${!result.text ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${file.text}\n`; + } if (!file.height) { promises.push([file, null]); @@ -85,6 +89,10 @@ async function encodeAndFormat(req, files, endpoint, mode) { promises.push(preparePayload(req, file)); } + if (result.text) { + result.text += '\n```'; + } + const detail = req.body.imageDetail ?? ImageDetail.auto; /** @type {Array<[MongoFile, string]>} */ diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index 8744eb409b..1bfadc4b23 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -28,8 +28,8 @@ const { addResourceFileId, deleteResourceFileId } = require('~/server/controller const { addAgentResourceFile, removeAgentResourceFiles } = require('~/models/Agent'); const { getOpenAIClient } = require('~/server/controllers/assistants/helpers'); const { createFile, updateFileUsage, deleteFiles } = require('~/models/File'); +const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { getEndpointsConfig } = require('~/server/services/Config'); -const { loadAuthValues } = require('~/app/clients/tools/util'); const { LB_QueueAsyncCall } = require('~/server/utils/queue'); const { getStrategyFunctions } = require('./strategies'); const { determineFileType } = require('~/server/utils'); @@ -162,7 +162,6 @@ const processDeleteRequest = async ({ req, files }) => { for (const file of files) { const source = file.source ?? FileSources.local; - if (req.body.agent_id && req.body.tool_resource) { agentFiles.push({ tool_resource: req.body.tool_resource, @@ -170,6 +169,11 @@ const processDeleteRequest = async ({ req, files }) => { }); } + if (source === FileSources.text) { + resolvedFileIds.push(file.file_id); + continue; + } + if (checkOpenAIStorage(source) && !client[source]) { await initializeClients(); } @@ -521,6 +525,52 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { if (!isFileSearchEnabled) { throw new Error('File search is not enabled for Agents'); } + } else if (tool_resource === EToolResources.ocr) { + const isOCREnabled = await checkCapability(req, AgentCapabilities.ocr); + if (!isOCREnabled) { + throw new Error('OCR capability is not enabled for Agents'); + } + + const { handleFileUpload } = getStrategyFunctions( + req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr, + ); + const { file_id, temp_file_id } = metadata; + + const { + text, + bytes, + // TODO: OCR images support? + images, + filename, + filepath: ocrFileURL, + } = await handleFileUpload({ req, file, file_id, entity_id: agent_id }); + + const fileInfo = removeNullishValues({ + text, + bytes, + file_id, + temp_file_id, + user: req.user.id, + type: file.mimetype, + filepath: ocrFileURL, + source: FileSources.text, + filename: filename ?? file.originalname, + model: messageAttachment ? undefined : req.body.model, + context: messageAttachment ? FileContext.message_attachment : FileContext.agents, + }); + + if (!messageAttachment && tool_resource) { + await addAgentResourceFile({ + req, + file_id, + agent_id, + tool_resource, + }); + } + const result = await createFile(fileInfo, true); + return res + .status(200) + .json({ message: 'Agent file uploaded and processed successfully', ...result }); } const source = diff --git a/api/server/services/Files/strategies.js b/api/server/services/Files/strategies.js index ddfdd57469..128715bf3e 100644 --- a/api/server/services/Files/strategies.js +++ b/api/server/services/Files/strategies.js @@ -24,6 +24,7 @@ const { const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./OpenAI'); const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code'); const { uploadVectors, deleteVectors } = require('./VectorDB'); +const { uploadMistralOCR } = require('./MistralOCR'); /** * Firebase Storage Strategy Functions @@ -127,6 +128,26 @@ const codeOutputStrategy = () => ({ getDownloadStream: getCodeOutputDownloadStream, }); +const mistralOCRStrategy = () => ({ + /** @type {typeof saveFileFromURL | null} */ + saveURL: null, + /** @type {typeof getLocalFileURL | null} */ + getFileURL: null, + /** @type {typeof saveLocalBuffer | null} */ + saveBuffer: null, + /** @type {typeof processLocalAvatar | null} */ + processAvatar: null, + /** @type {typeof uploadLocalImage | null} */ + handleImageUpload: null, + /** @type {typeof prepareImagesLocal | null} */ + prepareImagePayload: null, + /** @type {typeof deleteLocalFile | null} */ + deleteFile: null, + /** @type {typeof getLocalFileStream | null} */ + getDownloadStream: null, + handleFileUpload: uploadMistralOCR, +}); + // Strategy Selector const getStrategyFunctions = (fileSource) => { if (fileSource === FileSources.firebase) { @@ -141,6 +162,8 @@ const getStrategyFunctions = (fileSource) => { return vectorStrategy(); } else if (fileSource === FileSources.execute_code) { return codeOutputStrategy(); + } else if (fileSource === FileSources.mistral_ocr) { + return mistralOCRStrategy(); } else { throw new Error('Invalid file source'); } diff --git a/api/server/services/Tools/credentials.js b/api/server/services/Tools/credentials.js new file mode 100644 index 0000000000..b50a2460d4 --- /dev/null +++ b/api/server/services/Tools/credentials.js @@ -0,0 +1,56 @@ +const { getUserPluginAuthValue } = require('~/server/services/PluginService'); + +/** + * + * @param {Object} params + * @param {string} params.userId + * @param {string[]} params.authFields + * @param {Set} [params.optional] + * @param {boolean} [params.throwError] + * @returns + */ +const loadAuthValues = async ({ userId, authFields, optional, throwError = true }) => { + let authValues = {}; + + /** + * Finds the first non-empty value for the given authentication field, supporting alternate fields. + * @param {string[]} fields Array of strings representing the authentication fields. Supports alternate fields delimited by "||". + * @returns {Promise<{ authField: string, authValue: string} | null>} An object containing the authentication field and value, or null if not found. + */ + const findAuthValue = async (fields) => { + for (const field of fields) { + let value = process.env[field]; + if (value) { + return { authField: field, authValue: value }; + } + try { + value = await getUserPluginAuthValue(userId, field, throwError); + } catch (err) { + if (optional && optional.has(field)) { + return { authField: field, authValue: undefined }; + } + if (field === fields[fields.length - 1] && !value) { + throw err; + } + } + if (value) { + return { authField: field, authValue: value }; + } + } + return null; + }; + + for (let authField of authFields) { + const fields = authField.split('||'); + const result = await findAuthValue(fields); + if (result) { + authValues[result.authField] = result.authValue; + } + } + + return authValues; +}; + +module.exports = { + loadAuthValues, +}; diff --git a/api/server/utils/handleText.js b/api/server/utils/handleText.js index 8c681d8f4e..1b9cda28d7 100644 --- a/api/server/utils/handleText.js +++ b/api/server/utils/handleText.js @@ -203,6 +203,7 @@ function generateConfig(key, baseURL, endpoint) { AgentCapabilities.artifacts, AgentCapabilities.actions, AgentCapabilities.tools, + AgentCapabilities.ocr, ]; } diff --git a/api/test/__mocks__/logger.js b/api/test/__mocks__/logger.js index caeb004e39..549c57d5a4 100644 --- a/api/test/__mocks__/logger.js +++ b/api/test/__mocks__/logger.js @@ -39,7 +39,10 @@ jest.mock('winston-daily-rotate-file', () => { }); jest.mock('~/config', () => { + const actualModule = jest.requireActual('~/config'); return { + sendEvent: actualModule.sendEvent, + createAxiosInstance: actualModule.createAxiosInstance, logger: { info: jest.fn(), warn: jest.fn(), diff --git a/api/typedefs.js b/api/typedefs.js index 3045d9543b..21c4f1fecc 100644 --- a/api/typedefs.js +++ b/api/typedefs.js @@ -1787,3 +1787,51 @@ * @typedef {Promise<{ message: TMessage, conversation: TConversation }> | undefined} ClientDatabaseSavePromise * @memberof typedefs */ + +/** + * @exports OCRImage + * @typedef {Object} OCRImage + * @property {string} id - The identifier of the image. + * @property {number} top_left_x - X-coordinate of the top left corner of the image. + * @property {number} top_left_y - Y-coordinate of the top left corner of the image. + * @property {number} bottom_right_x - X-coordinate of the bottom right corner of the image. + * @property {number} bottom_right_y - Y-coordinate of the bottom right corner of the image. + * @property {string} image_base64 - Base64-encoded image data. + * @memberof typedefs + */ + +/** + * @exports PageDimensions + * @typedef {Object} PageDimensions + * @property {number} dpi - The dots per inch resolution of the page. + * @property {number} height - The height of the page in pixels. + * @property {number} width - The width of the page in pixels. + * @memberof typedefs + */ + +/** + * @exports OCRPage + * @typedef {Object} OCRPage + * @property {number} index - The index of the page in the document. + * @property {string} markdown - The extracted text content of the page in markdown format. + * @property {OCRImage[]} images - Array of images found on the page. + * @property {PageDimensions} dimensions - The dimensions of the page. + * @memberof typedefs + */ + +/** + * @exports OCRUsageInfo + * @typedef {Object} OCRUsageInfo + * @property {number} pages_processed - Number of pages processed in the document. + * @property {number} doc_size_bytes - Size of the document in bytes. + * @memberof typedefs + */ + +/** + * @exports OCRResult + * @typedef {Object} OCRResult + * @property {OCRPage[]} pages - Array of pages extracted from the document. + * @property {string} model - The model used for OCR processing. + * @property {OCRUsageInfo} usage_info - Usage information for the OCR operation. + * @memberof typedefs + */ diff --git a/client/src/common/agents-types.ts b/client/src/common/agents-types.ts index a9c24106bc..f4b8aac9fe 100644 --- a/client/src/common/agents-types.ts +++ b/client/src/common/agents-types.ts @@ -5,6 +5,7 @@ import type { OptionWithIcon, ExtendedFile } from './types'; export type TAgentOption = OptionWithIcon & Agent & { knowledge_files?: Array<[string, ExtendedFile]>; + context_files?: Array<[string, ExtendedFile]>; code_files?: Array<[string, ExtendedFile]>; }; diff --git a/client/src/common/types.ts b/client/src/common/types.ts index 380ec573b8..f0b2b8a238 100644 --- a/client/src/common/types.ts +++ b/client/src/common/types.ts @@ -483,6 +483,7 @@ export interface ExtendedFile { attached?: boolean; embedded?: boolean; tool_resource?: string; + metadata?: t.TFile['metadata']; } export type ContextType = { navVisible: boolean; setNavVisible: (visible: boolean) => void }; diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx index 54a8a595c4..8841a0ae51 100644 --- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx @@ -1,7 +1,7 @@ import * as Ariakit from '@ariakit/react'; import React, { useRef, useState, useMemo } from 'react'; -import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react'; import { EToolResources, EModelEndpoint } from 'librechat-data-provider'; +import { FileSearch, ImageUpIcon, TerminalSquareIcon, FileType2Icon } from 'lucide-react'; import { FileUpload, TooltipAnchor, DropdownPopup } from '~/components/ui'; import { useGetEndpointsQuery } from '~/data-provider'; import { AttachmentIcon } from '~/components/svg'; @@ -49,6 +49,17 @@ const AttachFile = ({ isRTL, disabled, handleFileChange }: AttachFileProps) => { }, ]; + if (capabilities.includes(EToolResources.ocr)) { + items.push({ + label: localize('com_ui_upload_ocr_text'), + onClick: () => { + setToolResource(EToolResources.ocr); + handleUploadClick(); + }, + icon: , + }); + } + if (capabilities.includes(EToolResources.file_search)) { items.push({ label: localize('com_ui_upload_file_search'), diff --git a/client/src/components/Chat/Input/Files/DragDropModal.tsx b/client/src/components/Chat/Input/Files/DragDropModal.tsx index b252ae1a93..2abc15a45b 100644 --- a/client/src/components/Chat/Input/Files/DragDropModal.tsx +++ b/client/src/components/Chat/Input/Files/DragDropModal.tsx @@ -1,6 +1,6 @@ import React, { useMemo } from 'react'; import { EModelEndpoint, EToolResources } from 'librechat-data-provider'; -import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react'; +import { FileSearch, ImageUpIcon, FileType2Icon, TerminalSquareIcon } from 'lucide-react'; import OGDialogTemplate from '~/components/ui/OGDialogTemplate'; import { useGetEndpointsQuery } from '~/data-provider'; import useLocalize from '~/hooks/useLocalize'; @@ -50,6 +50,12 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD value: EToolResources.execute_code, icon: , }); + } else if (capability === EToolResources.ocr) { + _options.push({ + label: localize('com_ui_upload_ocr_text'), + value: EToolResources.ocr, + icon: , + }); } } diff --git a/client/src/components/Chat/Input/Files/FilePreview.tsx b/client/src/components/Chat/Input/Files/FilePreview.tsx index 80933b8503..02851119af 100644 --- a/client/src/components/Chat/Input/Files/FilePreview.tsx +++ b/client/src/components/Chat/Input/Files/FilePreview.tsx @@ -19,7 +19,7 @@ const FilePreview = ({ }; className?: string; }) => { - const radius = 55; // Radius of the SVG circle + const radius = 55; const circumference = 2 * Math.PI * radius; const progress = useProgress( file?.['progress'] ?? 1, @@ -27,16 +27,15 @@ const FilePreview = ({ (file as ExtendedFile | undefined)?.size ?? 1, ); - // Calculate the offset based on the loading progress const offset = circumference - progress * circumference; const circleCSSProperties = { transition: 'stroke-dashoffset 0.5s linear', }; return ( -
+
- + {progress < 1 && ( + + + +
+ ); + } + + if (source === FileSources.text) { + return ( +
+ + + +
+ ); + } + + if (source === FileSources.vectordb) { + return ( +
+ + + +
+ ); } const endpoint = sourceToEndpoint[source ?? '']; @@ -31,7 +64,7 @@ export default function SourceIcon({ return null; } return ( - +
); } diff --git a/client/src/components/SidePanel/Agents/AgentConfig.tsx b/client/src/components/SidePanel/Agents/AgentConfig.tsx index 9fc7674158..7ece82bed1 100644 --- a/client/src/components/SidePanel/Agents/AgentConfig.tsx +++ b/client/src/components/SidePanel/Agents/AgentConfig.tsx @@ -23,6 +23,7 @@ import { processAgentOption } from '~/utils'; import AdminSettings from './AdminSettings'; import DeleteButton from './DeleteButton'; import AgentAvatar from './AgentAvatar'; +import FileContext from './FileContext'; import { Spinner } from '~/components'; import FileSearch from './FileSearch'; import ShareAgent from './ShareAgent'; @@ -82,6 +83,10 @@ export default function AgentConfig({ () => agentsConfig?.capabilities.includes(AgentCapabilities.artifacts) ?? false, [agentsConfig], ); + const ocrEnabled = useMemo( + () => agentsConfig?.capabilities.includes(AgentCapabilities.ocr) ?? false, + [agentsConfig], + ); const fileSearchEnabled = useMemo( () => agentsConfig?.capabilities.includes(AgentCapabilities.file_search) ?? false, [agentsConfig], @@ -91,6 +96,26 @@ export default function AgentConfig({ [agentsConfig], ); + const context_files = useMemo(() => { + if (typeof agent === 'string') { + return []; + } + + if (agent?.id !== agent_id) { + return []; + } + + if (agent.context_files) { + return agent.context_files; + } + + const _agent = processAgentOption({ + agent, + fileMap, + }); + return _agent.context_files ?? []; + }, [agent, agent_id, fileMap]); + const knowledge_files = useMemo(() => { if (typeof agent === 'string') { return []; @@ -334,7 +359,7 @@ export default function AgentConfig({ - {(codeEnabled || fileSearchEnabled || artifactsEnabled) && ( + {(codeEnabled || fileSearchEnabled || artifactsEnabled || ocrEnabled) && (
)} {/* Agent Tools & Actions */} diff --git a/client/src/components/SidePanel/Agents/FileContext.tsx b/client/src/components/SidePanel/Agents/FileContext.tsx new file mode 100644 index 0000000000..eb0e9e9a13 --- /dev/null +++ b/client/src/components/SidePanel/Agents/FileContext.tsx @@ -0,0 +1,128 @@ +import { useState, useRef } from 'react'; +import { + EModelEndpoint, + EToolResources, + mergeFileConfig, + fileConfig as defaultFileConfig, +} from 'librechat-data-provider'; +import type { ExtendedFile } from '~/common'; +import { useFileHandling, useLocalize, useLazyEffect } from '~/hooks'; +import FileRow from '~/components/Chat/Input/Files/FileRow'; +import { useGetFileConfig } from '~/data-provider'; +import { HoverCard, HoverCardContent, HoverCardPortal, HoverCardTrigger } from '~/components/ui'; +import { AttachmentIcon, CircleHelpIcon } from '~/components/svg'; +import { useChatContext } from '~/Providers'; +import { ESide } from '~/common'; + +export default function FileContext({ + agent_id, + files: _files, +}: { + agent_id: string; + files?: [string, ExtendedFile][]; +}) { + const localize = useLocalize(); + const { setFilesLoading } = useChatContext(); + const fileInputRef = useRef(null); + const [files, setFiles] = useState>(new Map()); + + const { data: fileConfig = defaultFileConfig } = useGetFileConfig({ + select: (data) => mergeFileConfig(data), + }); + + const { handleFileChange } = useFileHandling({ + overrideEndpoint: EModelEndpoint.agents, + additionalMetadata: { agent_id, tool_resource: EToolResources.ocr }, + fileSetter: setFiles, + }); + + useLazyEffect( + () => { + if (_files) { + setFiles(new Map(_files)); + } + }, + [_files], + 750, + ); + + const endpointFileConfig = fileConfig.endpoints[EModelEndpoint.agents]; + const isUploadDisabled = endpointFileConfig.disabled ?? false; + + if (isUploadDisabled) { + return null; + } + + const handleButtonClick = () => { + // necessary to reset the input + if (fileInputRef.current) { + fileInputRef.current.value = ''; + } + fileInputRef.current?.click(); + }; + + return ( +
+ +
+ + + + + + + + +
+

+ {localize('com_agents_file_context_info')} +

+
+
+
+
+
+
+ {/* File Context (OCR) Files */} +
{children}
} + /> +
+ +
+ {/* Disabled Message */} + {agent_id ? null : ( +
+ {localize('com_agents_file_context_disabled')} +
+ )} +
+
+ ); +} diff --git a/client/src/components/SidePanel/Files/PanelColumns.tsx b/client/src/components/SidePanel/Files/PanelColumns.tsx index d8fc15f6c6..5d199dfc9a 100644 --- a/client/src/components/SidePanel/Files/PanelColumns.tsx +++ b/client/src/components/SidePanel/Files/PanelColumns.tsx @@ -1,21 +1,23 @@ import { ArrowUpDown } from 'lucide-react'; import type { ColumnDef } from '@tanstack/react-table'; import type { TFile } from 'librechat-data-provider'; +import useLocalize from '~/hooks/useLocalize'; import PanelFileCell from './PanelFileCell'; import { Button } from '~/components/ui'; import { formatDate } from '~/utils'; -export const columns: ColumnDef[] = [ +export const columns: ColumnDef[] = [ { accessorKey: 'filename', header: ({ column }) => { + const localize = useLocalize(); return ( ); @@ -31,20 +33,21 @@ export const columns: ColumnDef[] = [ size: '10%', }, header: ({ column }) => { + const localize = useLocalize(); return ( ); }, cell: ({ row }) => ( - {formatDate(row.original.updatedAt?.toString() ?? '')} + {formatDate(row.original?.updatedAt?.toString() ?? '')} ), }, diff --git a/client/src/components/SidePanel/Files/PanelFileCell.tsx b/client/src/components/SidePanel/Files/PanelFileCell.tsx index e039b8b257..2a38a57172 100644 --- a/client/src/components/SidePanel/Files/PanelFileCell.tsx +++ b/client/src/components/SidePanel/Files/PanelFileCell.tsx @@ -6,7 +6,6 @@ import { getFileType } from '~/utils'; export default function PanelFileCell({ row }: { row: Row }) { const file = row.original; - return (
{file?.type.startsWith('image') === true ? ( diff --git a/client/src/components/SidePanel/Files/PanelTable.tsx b/client/src/components/SidePanel/Files/PanelTable.tsx index 51d0c864c1..87a965a968 100644 --- a/client/src/components/SidePanel/Files/PanelTable.tsx +++ b/client/src/components/SidePanel/Files/PanelTable.tsx @@ -159,6 +159,7 @@ export default function DataTable({ columns, data }: DataTablePro filename: fileData.filename, source: fileData.source, size: fileData.bytes, + metadata: fileData.metadata, }); }, [addFile, fileMap, conversation, localize, showToast, fileConfig.endpoints], diff --git a/client/src/data-provider/Files/mutations.ts b/client/src/data-provider/Files/mutations.ts index 9f51a5e717..8ac2a5e49e 100644 --- a/client/src/data-provider/Files/mutations.ts +++ b/client/src/data-provider/Files/mutations.ts @@ -63,8 +63,9 @@ export const useUploadFileMutation = ( const update = {}; const prevResources = agent.tool_resources ?? {}; - const prevResource: t.ExecuteCodeResource | t.AgentFileSearchResource = agent - .tool_resources?.[tool_resource] ?? { + const prevResource: t.ExecuteCodeResource | t.AgentFileResource = agent.tool_resources?.[ + tool_resource + ] ?? { file_ids: [], }; if (!prevResource.file_ids) { diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index d471d6a00a..e56024e169 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -11,6 +11,9 @@ "com_agents_create_error": "There was an error creating your agent.", "com_agents_description_placeholder": "Optional: Describe your Agent here", "com_agents_enable_file_search": "Enable File Search", + "com_agents_file_context": "File Context (OCR)", + "com_agents_file_context_disabled": "Agent must be created before uploading files for File Context.", + "com_agents_file_context_info": "Files uploaded as \"Context\" are processed using OCR to extract text, which is then added to the Agent's instructions. Ideal for documents, images with text, or PDFs where you need the full text content of a file", "com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.", "com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.", "com_agents_instructions_placeholder": "The system instructions that the agent uses", @@ -811,10 +814,12 @@ "com_ui_upload_code_files": "Upload for Code Interpreter", "com_ui_upload_delay": "Uploading \"{{0}}\" is taking more time than anticipated. Please wait while the file finishes indexing for retrieval.", "com_ui_upload_error": "There was an error uploading your file", + "com_ui_upload_file_context": "Upload File Context", "com_ui_upload_file_search": "Upload for File Search", "com_ui_upload_files": "Upload files", "com_ui_upload_image": "Upload an image", "com_ui_upload_image_input": "Upload Image", + "com_ui_upload_ocr_text": "Upload as Text", "com_ui_upload_invalid": "Invalid file for upload. Must be an image not exceeding the limit", "com_ui_upload_invalid_var": "Invalid file for upload. Must be an image not exceeding {{0}} MB", "com_ui_upload_success": "Successfully uploaded file", @@ -835,4 +840,4 @@ "com_ui_zoom": "Zoom", "com_user_message": "You", "com_warning_resubmit_unsupported": "Resubmitting the AI message is not supported for this endpoint." -} \ No newline at end of file +} diff --git a/client/src/utils/forms.tsx b/client/src/utils/forms.tsx index 04e52c9893..f8ccc7cb42 100644 --- a/client/src/utils/forms.tsx +++ b/client/src/utils/forms.tsx @@ -58,6 +58,9 @@ export const processAgentOption = ({ label: _agent?.name ?? '', value: _agent?.id ?? '', icon: isGlobal ? : null, + context_files: _agent?.tool_resources?.ocr?.file_ids + ? ([] as Array<[string, ExtendedFile]>) + : undefined, knowledge_files: _agent?.tool_resources?.file_search?.file_ids ? ([] as Array<[string, ExtendedFile]>) : undefined, @@ -83,7 +86,7 @@ export const processAgentOption = ({ const source = tool_resource === EToolResources.file_search ? FileSources.vectordb - : file?.source ?? FileSources.local; + : (file?.source ?? FileSources.local); if (file) { list?.push([ @@ -97,6 +100,7 @@ export const processAgentOption = ({ height: file.height, size: file.bytes, preview: file.filepath, + metadata: file.metadata, progress: 1, source, }, @@ -117,6 +121,16 @@ export const processAgentOption = ({ } }; + if (agent.context_files && _agent?.tool_resources?.ocr?.file_ids) { + _agent.tool_resources.ocr.file_ids.forEach((file_id) => + handleFile({ + file_id, + list: agent.context_files, + tool_resource: EToolResources.ocr, + }), + ); + } + if (agent.knowledge_files && _agent?.tool_resources?.file_search?.file_ids) { _agent.tool_resources.file_search.file_ids.forEach((file_id) => handleFile({ diff --git a/package-lock.json b/package-lock.json index e9d9c1ef16..d7e4f93476 100644 --- a/package-lock.json +++ b/package-lock.json @@ -41014,7 +41014,7 @@ }, "packages/data-provider": { "name": "librechat-data-provider", - "version": "0.7.7", + "version": "0.7.71", "license": "ISC", "dependencies": { "axios": "^1.8.2", diff --git a/packages/data-provider/package.json b/packages/data-provider/package.json index 2e151fd072..5ebf9877c0 100644 --- a/packages/data-provider/package.json +++ b/packages/data-provider/package.json @@ -1,6 +1,6 @@ { "name": "librechat-data-provider", - "version": "0.7.7", + "version": "0.7.71", "description": "data services for librechat apps", "main": "dist/index.js", "module": "dist/index.es.js", diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index d4c400c827..1cc0d5a785 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -168,6 +168,7 @@ export enum AgentCapabilities { artifacts = 'artifacts', actions = 'actions', tools = 'tools', + ocr = 'ocr', } export const defaultAssistantsVersion = { @@ -242,6 +243,7 @@ export const agentsEndpointSChema = baseEndpointSchema.merge( AgentCapabilities.artifacts, AgentCapabilities.actions, AgentCapabilities.tools, + AgentCapabilities.ocr, ]), }), ); @@ -534,9 +536,22 @@ export type TStartupConfig = { bundlerURL?: string; }; +export enum OCRStrategy { + MISTRAL_OCR = 'mistral_ocr', + CUSTOM_OCR = 'custom_ocr', +} + +export const ocrSchema = z.object({ + mistralModel: z.string().optional(), + apiKey: z.string().optional().default('OCR_API_KEY'), + baseURL: z.string().optional().default('OCR_BASEURL'), + strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR), +}); + export const configSchema = z.object({ version: z.string(), cache: z.boolean().default(true), + ocr: ocrSchema.optional(), secureImageLinks: z.boolean().optional(), imageOutputType: z.nativeEnum(EImageOutputType).default(EImageOutputType.PNG), includedTools: z.array(z.string()).optional(), @@ -1175,7 +1190,7 @@ export enum Constants { /** Key for the app's version. */ VERSION = 'v0.7.7', /** Key for the Custom Config's version (librechat.yaml). */ - CONFIG_VERSION = '1.2.1', + CONFIG_VERSION = '1.2.2', /** Standard value for the first message's `parentMessageId` value, to indicate no parent exists. */ NO_PARENT = '00000000-0000-0000-0000-000000000000', /** Standard value for the initial conversationId before a request is sent */ diff --git a/packages/data-provider/src/index.ts b/packages/data-provider/src/index.ts index 90b396001b..028ed07f19 100644 --- a/packages/data-provider/src/index.ts +++ b/packages/data-provider/src/index.ts @@ -7,6 +7,7 @@ export * from './file-config'; export * from './artifacts'; /* schema helpers */ export * from './parsers'; +export * from './ocr'; export * from './zod'; /* custom/dynamic configurations */ export * from './generate'; diff --git a/packages/data-provider/src/ocr.ts b/packages/data-provider/src/ocr.ts new file mode 100644 index 0000000000..cfde43025b --- /dev/null +++ b/packages/data-provider/src/ocr.ts @@ -0,0 +1,14 @@ +import type { TCustomConfig } from '../src/config'; +import { OCRStrategy } from '../src/config'; + +export function loadOCRConfig(config: TCustomConfig['ocr']): TCustomConfig['ocr'] { + const baseURL = config?.baseURL ?? ''; + const apiKey = config?.apiKey ?? ''; + const mistralModel = config?.mistralModel ?? ''; + return { + apiKey, + baseURL, + mistralModel, + strategy: config?.strategy ?? OCRStrategy.MISTRAL_OCR, + }; +} diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 533d6ffc37..5be5b44453 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -1152,7 +1152,6 @@ export const compactAgentsSchema = tConversationSchema iconURL: true, greeting: true, agent_id: true, - resendFiles: true, instructions: true, additional_instructions: true, }) diff --git a/packages/data-provider/src/types/assistants.ts b/packages/data-provider/src/types/assistants.ts index a351752298..7092f98ae6 100644 --- a/packages/data-provider/src/types/assistants.ts +++ b/packages/data-provider/src/types/assistants.ts @@ -27,6 +27,7 @@ export enum EToolResources { code_interpreter = 'code_interpreter', execute_code = 'execute_code', file_search = 'file_search', + ocr = 'ocr', } export type Tool = { @@ -163,7 +164,8 @@ export type AgentModelParameters = { export interface AgentToolResources { execute_code?: ExecuteCodeResource; - file_search?: AgentFileSearchResource; + file_search?: AgentFileResource; + ocr?: Omit; } export interface ExecuteCodeResource { /** @@ -177,7 +179,7 @@ export interface ExecuteCodeResource { files?: Array; } -export interface AgentFileSearchResource { +export interface AgentFileResource { /** * The ID of the vector store attached to this agent. There * can be a maximum of 1 vector store attached to the agent. diff --git a/packages/data-provider/src/types/files.ts b/packages/data-provider/src/types/files.ts index 5985096f4c..78ef7781e8 100644 --- a/packages/data-provider/src/types/files.ts +++ b/packages/data-provider/src/types/files.ts @@ -8,6 +8,8 @@ export enum FileSources { s3 = 's3', vectordb = 'vectordb', execute_code = 'execute_code', + mistral_ocr = 'mistral_ocr', + text = 'text', } export const checkOpenAIStorage = (source: string) => diff --git a/packages/data-schemas/src/schema/file.ts b/packages/data-schemas/src/schema/file.ts index b9fddd82d5..6d3b22a5a5 100644 --- a/packages/data-schemas/src/schema/file.ts +++ b/packages/data-schemas/src/schema/file.ts @@ -8,6 +8,7 @@ export interface IMongoFile extends Document { file_id: string; temp_file_id?: string; bytes: number; + text?: string; filename: string; filepath: string; object: 'file'; @@ -72,6 +73,9 @@ const file: Schema = new Schema( type: String, required: true, }, + text: { + type: String, + }, context: { type: String, }, From cbd5bd240561d27c97eba2c29d9b4f3bf8ece58f Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Tue, 11 Mar 2025 18:18:27 +0100 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=8E=A8=20a11y:=20Update=20Model=20S?= =?UTF-8?q?pec=20Description=20Text=20(#6294)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client/src/components/Chat/Menus/Models/ModelSpec.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/components/Chat/Menus/Models/ModelSpec.tsx b/client/src/components/Chat/Menus/Models/ModelSpec.tsx index 44cf51a976..617680946a 100644 --- a/client/src/components/Chat/Menus/Models/ModelSpec.tsx +++ b/client/src/components/Chat/Menus/Models/ModelSpec.tsx @@ -75,7 +75,7 @@ const MenuItem: FC = ({ {showIconInMenu && }
{title} -
{description}
+
{description}
From cf03731cc8c87e2130a504b08afb2a7109684f52 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Tue, 11 Mar 2025 14:44:54 -0400 Subject: [PATCH 03/12] =?UTF-8?q?=F0=9F=94=A7=20fix:=20Axios=20Proxy=20Usa?= =?UTF-8?q?ge=20And=20Bump=20`mongoose`=20(#6298)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: bump mongoose to fix nested schema errors * fix: Enhance Axios instance creation with improved proxy handling and error logging * fix: Refactor Axios instance creation and remove proxy handling from file upload functions * fix: Update proxy configuration in Axios instance creation and add unit tests --- api/config/index.js | 32 +++- api/config/index.spec.js | 126 ++++++++++++++++ api/package.json | 2 +- api/server/services/Files/Code/crud.js | 18 +-- package-lock.json | 201 +++++++++++++++---------- packages/data-schemas/package.json | 2 +- 6 files changed, 279 insertions(+), 102 deletions(-) create mode 100644 api/config/index.spec.js diff --git a/api/config/index.js b/api/config/index.js index dd765efb8e..8f23e404c8 100644 --- a/api/config/index.js +++ b/api/config/index.js @@ -48,15 +48,37 @@ const sendEvent = (res, event) => { res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`); }; +/** + * Creates and configures an Axios instance with optional proxy settings. + * + * @typedef {import('axios').AxiosInstance} AxiosInstance + * @typedef {import('axios').AxiosProxyConfig} AxiosProxyConfig + * + * @returns {AxiosInstance} A configured Axios instance + * @throws {Error} If there's an issue creating the Axios instance or parsing the proxy URL + */ function createAxiosInstance() { const instance = axios.create(); if (process.env.proxy) { - const url = new URL(process.env.proxy); - instance.defaults.proxy = { - host: url.hostname, - protocol: url.protocol.replace(':', ''), - }; + try { + const url = new URL(process.env.proxy); + + /** @type {AxiosProxyConfig} */ + const proxyConfig = { + host: url.hostname.replace(/^\[|\]$/g, ''), + protocol: url.protocol.replace(':', ''), + }; + + if (url.port) { + proxyConfig.port = parseInt(url.port, 10); + } + + instance.defaults.proxy = proxyConfig; + } catch (error) { + console.error('Error parsing proxy URL:', error); + throw new Error(`Invalid proxy URL: ${process.env.proxy}`); + } } return instance; diff --git a/api/config/index.spec.js b/api/config/index.spec.js new file mode 100644 index 0000000000..36ed8302f3 --- /dev/null +++ b/api/config/index.spec.js @@ -0,0 +1,126 @@ +const axios = require('axios'); +const { createAxiosInstance } = require('./index'); + +// Mock axios +jest.mock('axios', () => ({ + interceptors: { + request: { use: jest.fn(), eject: jest.fn() }, + response: { use: jest.fn(), eject: jest.fn() }, + }, + create: jest.fn().mockReturnValue({ + defaults: { + proxy: null, + }, + get: jest.fn().mockResolvedValue({ data: {} }), + post: jest.fn().mockResolvedValue({ data: {} }), + put: jest.fn().mockResolvedValue({ data: {} }), + delete: jest.fn().mockResolvedValue({ data: {} }), + }), + get: jest.fn().mockResolvedValue({ data: {} }), + post: jest.fn().mockResolvedValue({ data: {} }), + put: jest.fn().mockResolvedValue({ data: {} }), + delete: jest.fn().mockResolvedValue({ data: {} }), + reset: jest.fn().mockImplementation(function () { + this.get.mockClear(); + this.post.mockClear(); + this.put.mockClear(); + this.delete.mockClear(); + this.create.mockClear(); + }), +})); + +describe('createAxiosInstance', () => { + const originalEnv = process.env; + + beforeEach(() => { + // Reset mocks + jest.clearAllMocks(); + // Create a clean copy of process.env + process.env = { ...originalEnv }; + // Default: no proxy + delete process.env.proxy; + }); + + afterAll(() => { + // Restore original process.env + process.env = originalEnv; + }); + + test('creates an axios instance without proxy when no proxy env is set', () => { + const instance = createAxiosInstance(); + + expect(axios.create).toHaveBeenCalledTimes(1); + expect(instance.defaults.proxy).toBeNull(); + }); + + test('configures proxy correctly with hostname and protocol', () => { + process.env.proxy = 'http://example.com'; + + const instance = createAxiosInstance(); + + expect(axios.create).toHaveBeenCalledTimes(1); + expect(instance.defaults.proxy).toEqual({ + host: 'example.com', + protocol: 'http', + }); + }); + + test('configures proxy correctly with hostname, protocol and port', () => { + process.env.proxy = 'https://proxy.example.com:8080'; + + const instance = createAxiosInstance(); + + expect(axios.create).toHaveBeenCalledTimes(1); + expect(instance.defaults.proxy).toEqual({ + host: 'proxy.example.com', + protocol: 'https', + port: 8080, + }); + }); + + test('handles proxy URLs with authentication', () => { + process.env.proxy = 'http://user:pass@proxy.example.com:3128'; + + const instance = createAxiosInstance(); + + expect(axios.create).toHaveBeenCalledTimes(1); + expect(instance.defaults.proxy).toEqual({ + host: 'proxy.example.com', + protocol: 'http', + port: 3128, + // Note: The current implementation doesn't handle auth - if needed, add this functionality + }); + }); + + test('throws error when proxy URL is invalid', () => { + process.env.proxy = 'invalid-url'; + + expect(() => createAxiosInstance()).toThrow('Invalid proxy URL'); + expect(axios.create).toHaveBeenCalledTimes(1); + }); + + // If you want to test the actual URL parsing more thoroughly + test('handles edge case proxy URLs correctly', () => { + // IPv6 address + process.env.proxy = 'http://[::1]:8080'; + + let instance = createAxiosInstance(); + + expect(instance.defaults.proxy).toEqual({ + host: '::1', + protocol: 'http', + port: 8080, + }); + + // URL with path (which should be ignored for proxy config) + process.env.proxy = 'http://proxy.example.com:8080/some/path'; + + instance = createAxiosInstance(); + + expect(instance.defaults.proxy).toEqual({ + host: 'proxy.example.com', + protocol: 'http', + port: 8080, + }); + }); +}); diff --git a/api/package.json b/api/package.json index cfc9977aaf..9197691a72 100644 --- a/api/package.json +++ b/api/package.json @@ -82,7 +82,7 @@ "memorystore": "^1.6.7", "mime": "^3.0.0", "module-alias": "^2.2.3", - "mongoose": "^8.9.5", + "mongoose": "^8.12.1", "multer": "^1.4.5-lts.1", "nanoid": "^3.3.7", "nodemailer": "^6.9.15", diff --git a/api/server/services/Files/Code/crud.js b/api/server/services/Files/Code/crud.js index a467f6a29a..1360cccadb 100644 --- a/api/server/services/Files/Code/crud.js +++ b/api/server/services/Files/Code/crud.js @@ -1,8 +1,10 @@ -const axios = require('axios'); const FormData = require('form-data'); const { getCodeBaseURL } = require('@librechat/agents'); +const { createAxiosInstance } = require('~/config'); const { logAxiosError } = require('~/utils'); +const axios = createAxiosInstance(); + const MAX_FILE_SIZE = 150 * 1024 * 1024; /** @@ -27,13 +29,6 @@ async function getCodeOutputDownloadStream(fileIdentifier, apiKey) { timeout: 15000, }; - if (process.env.PROXY) { - options.proxy = { - host: process.env.PROXY, - protocol: process.env.PROXY.startsWith('https') ? 'https' : 'http', - }; - } - const response = await axios(options); return response; } catch (error) { @@ -79,13 +74,6 @@ async function uploadCodeEnvFile({ req, stream, filename, apiKey, entity_id = '' maxBodyLength: MAX_FILE_SIZE, }; - if (process.env.PROXY) { - options.proxy = { - host: process.env.PROXY, - protocol: process.env.PROXY.startsWith('https') ? 'https' : 'http', - }; - } - const response = await axios.post(`${baseURL}/upload`, form, options); /** @type {{ message: string; session_id: string; files: Array<{ fileId: string; filename: string }> }} */ diff --git a/package-lock.json b/package-lock.json index d7e4f93476..db80801444 100644 --- a/package-lock.json +++ b/package-lock.json @@ -98,7 +98,7 @@ "memorystore": "^1.6.7", "mime": "^3.0.0", "module-alias": "^2.2.3", - "mongoose": "^8.9.5", + "mongoose": "^8.12.1", "multer": "^1.4.5-lts.1", "nanoid": "^3.3.7", "nodemailer": "^6.9.15", @@ -677,7 +677,6 @@ "version": "11.0.5", "resolved": "https://registry.npmjs.org/@types/whatwg-url/-/whatwg-url-11.0.5.tgz", "integrity": "sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==", - "devOptional": true, "dependencies": { "@types/webidl-conversions": "*" } @@ -691,6 +690,14 @@ "node": ">= 14" } }, + "api/node_modules/bson": { + "version": "6.10.3", + "resolved": "https://registry.npmjs.org/bson/-/bson-6.10.3.tgz", + "integrity": "sha512-MTxGsqgYTwfshYWTRdmZRC+M7FnG1b4y7RO7p2k3X24Wq0yv1m77Wsj0BzlPzd/IowgESfsruQCUToa7vbOpPQ==", + "engines": { + "node": ">=16.20.1" + } + }, "api/node_modules/cookie-parser": { "version": "1.4.7", "resolved": "https://registry.npmjs.org/cookie-parser/-/cookie-parser-1.4.7.tgz", @@ -880,13 +887,12 @@ } }, "api/node_modules/mongodb": { - "version": "6.10.0", - "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.10.0.tgz", - "integrity": "sha512-gP9vduuYWb9ZkDM546M+MP2qKVk5ZG2wPF63OvSRuUbqCR+11ZCAE1mOfllhlAG0wcoJY5yDL/rV3OmYEwXIzg==", - "devOptional": true, + "version": "6.14.2", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.14.2.tgz", + "integrity": "sha512-kMEHNo0F3P6QKDq17zcDuPeaywK/YaJVCEQRzPF3TOM/Bl9MFg64YE5Tu7ifj37qZJMhwU1tl2Ioivws5gRG5Q==", "dependencies": { - "@mongodb-js/saslprep": "^1.1.5", - "bson": "^6.7.0", + "@mongodb-js/saslprep": "^1.1.9", + "bson": "^6.10.3", "mongodb-connection-string-url": "^3.0.0" }, "engines": { @@ -894,7 +900,7 @@ }, "peerDependencies": { "@aws-sdk/credential-providers": "^3.188.0", - "@mongodb-js/zstd": "^1.1.0", + "@mongodb-js/zstd": "^1.1.0 || ^2.0.0", "gcp-metadata": "^5.2.0", "kerberos": "^2.0.1", "mongodb-client-encryption": ">=6.0.0 <7", @@ -929,7 +935,6 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/mongodb-connection-string-url/-/mongodb-connection-string-url-3.0.1.tgz", "integrity": "sha512-XqMGwRX0Lgn05TDB4PyG2h2kKO/FfWJyCzYQbIhXUxz7ETt0I/FqHjUeqj37irJ+Dl1ZtU82uYyj14u2XsZKfg==", - "devOptional": true, "dependencies": { "@types/whatwg-url": "^11.0.2", "whatwg-url": "^13.0.0" @@ -939,7 +944,6 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/tr46/-/tr46-4.1.1.tgz", "integrity": "sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==", - "devOptional": true, "dependencies": { "punycode": "^2.3.0" }, @@ -951,7 +955,6 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", - "devOptional": true, "engines": { "node": ">=12" } @@ -960,7 +963,6 @@ "version": "13.0.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-13.0.0.tgz", "integrity": "sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==", - "devOptional": true, "dependencies": { "tr46": "^4.1.1", "webidl-conversions": "^7.0.0" @@ -1008,6 +1010,27 @@ "node": ">=16.20.1" } }, + "api/node_modules/mongoose": { + "version": "8.12.1", + "resolved": "https://registry.npmjs.org/mongoose/-/mongoose-8.12.1.tgz", + "integrity": "sha512-UW22y8QFVYmrb36hm8cGncfn4ARc/XsYWQwRTaj0gxtQk1rDuhzDO1eBantS+hTTatfAIS96LlRCJrcNHvW5+Q==", + "dependencies": { + "bson": "^6.10.3", + "kareem": "2.6.3", + "mongodb": "~6.14.0", + "mpath": "0.9.0", + "mquery": "5.0.0", + "ms": "2.1.3", + "sift": "17.1.3" + }, + "engines": { + "node": ">=16.20.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/mongoose" + } + }, "api/node_modules/node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -22564,6 +22587,8 @@ "version": "6.10.1", "resolved": "https://registry.npmjs.org/bson/-/bson-6.10.1.tgz", "integrity": "sha512-P92xmHDQjSKPLHqFxefqMxASNq/aWJMEZugpCjf+AF/pgcUpMMQCg7t7+ewko0/u8AapvF3luf/FoehddEK+sA==", + "optional": true, + "peer": true, "engines": { "node": ">=16.20.1" } @@ -31839,6 +31864,8 @@ "version": "6.12.0", "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.12.0.tgz", "integrity": "sha512-RM7AHlvYfS7jv7+BXund/kR64DryVI+cHbVAy9P61fnb1RcWZqOW1/Wj2YhqMCx+MuYhqTRGv7AwHBzmsCKBfA==", + "optional": true, + "peer": true, "dependencies": { "@mongodb-js/saslprep": "^1.1.9", "bson": "^6.10.1", @@ -31893,6 +31920,8 @@ "version": "11.0.5", "resolved": "https://registry.npmjs.org/@types/whatwg-url/-/whatwg-url-11.0.5.tgz", "integrity": "sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==", + "optional": true, + "peer": true, "dependencies": { "@types/webidl-conversions": "*" } @@ -31901,6 +31930,8 @@ "version": "3.0.2", "resolved": "https://registry.npmjs.org/mongodb-connection-string-url/-/mongodb-connection-string-url-3.0.2.tgz", "integrity": "sha512-rMO7CGo/9BFwyZABcKAWL8UJwH/Kc2x0g72uhDWzG48URRax5TCIcJ7Rc3RZqffZzO/Gwff/jyKwCU9TN8gehA==", + "optional": true, + "peer": true, "dependencies": { "@types/whatwg-url": "^11.0.2", "whatwg-url": "^14.1.0 || ^13.0.0" @@ -31910,6 +31941,8 @@ "version": "5.0.0", "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz", "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==", + "optional": true, + "peer": true, "dependencies": { "punycode": "^2.3.1" }, @@ -31921,6 +31954,8 @@ "version": "14.1.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.1.0.tgz", "integrity": "sha512-jlf/foYIKywAt3x/XWKZ/3rz8OSJPiWktjmk891alJUEjiVxKX9LEO92qH3hv4aJ0mN3MWPvGMCy8jQi95xK4w==", + "optional": true, + "peer": true, "dependencies": { "tr46": "^5.0.0", "webidl-conversions": "^7.0.0" @@ -31929,27 +31964,6 @@ "node": ">=18" } }, - "node_modules/mongoose": { - "version": "8.9.5", - "resolved": "https://registry.npmjs.org/mongoose/-/mongoose-8.9.5.tgz", - "integrity": "sha512-SPhOrgBm0nKV3b+IIHGqpUTOmgVL5Z3OO9AwkFEmvOZznXTvplbomstCnPOGAyungtRXE5pJTgKpKcZTdjeESg==", - "dependencies": { - "bson": "^6.10.1", - "kareem": "2.6.3", - "mongodb": "~6.12.0", - "mpath": "0.9.0", - "mquery": "5.0.0", - "ms": "2.1.3", - "sift": "17.1.3" - }, - "engines": { - "node": ">=16.20.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/mongoose" - } - }, "node_modules/moo-color": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/moo-color/-/moo-color-1.0.3.tgz", @@ -41152,9 +41166,9 @@ "packages/data-schemas": { "name": "@librechat/data-schemas", "version": "0.0.2", - "license": "ISC", + "license": "MIT", "dependencies": { - "mongoose": "^8.9.5" + "mongoose": "^8.12.1" }, "devDependencies": { "@rollup/plugin-alias": "^5.1.0", @@ -41182,6 +41196,14 @@ "keyv": "^4.5.4" } }, + "packages/data-schemas/node_modules/@types/whatwg-url": { + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/@types/whatwg-url/-/whatwg-url-11.0.5.tgz", + "integrity": "sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==", + "dependencies": { + "@types/webidl-conversions": "*" + } + }, "packages/data-schemas/node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -41193,12 +41215,11 @@ } }, "packages/data-schemas/node_modules/bson": { - "version": "5.5.1", - "resolved": "https://registry.npmjs.org/bson/-/bson-5.5.1.tgz", - "integrity": "sha512-ix0EwukN2EpC0SRWIj/7B5+A6uQMQy6KMREI9qQqvgpkV2frH63T0UDVd1SYedL6dNCmDBYB3QtXi4ISk9YT+g==", - "license": "Apache-2.0", + "version": "6.10.3", + "resolved": "https://registry.npmjs.org/bson/-/bson-6.10.3.tgz", + "integrity": "sha512-MTxGsqgYTwfshYWTRdmZRC+M7FnG1b4y7RO7p2k3X24Wq0yv1m77Wsj0BzlPzd/IowgESfsruQCUToa7vbOpPQ==", "engines": { - "node": ">=14.20.1" + "node": ">=16.20.1" } }, "packages/data-schemas/node_modules/glob": { @@ -41238,15 +41259,6 @@ "@pkgjs/parseargs": "^0.11.0" } }, - "packages/data-schemas/node_modules/kareem": { - "version": "2.5.1", - "resolved": "https://registry.npmjs.org/kareem/-/kareem-2.5.1.tgz", - "integrity": "sha512-7jFxRVm+jD+rkq3kY0iZDJfsO2/t4BBPeEb2qKn2lR/9KhuksYk5hxzfRYWMPV8P/x2d0kHD306YyWLzjjH+uA==", - "license": "Apache-2.0", - "engines": { - "node": ">=12.0.0" - } - }, "packages/data-schemas/node_modules/minimatch": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", @@ -41264,27 +41276,25 @@ } }, "packages/data-schemas/node_modules/mongodb": { - "version": "5.9.2", - "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-5.9.2.tgz", - "integrity": "sha512-H60HecKO4Bc+7dhOv4sJlgvenK4fQNqqUIlXxZYQNbfEWSALGAwGoyJd/0Qwk4TttFXUOHJ2ZJQe/52ScaUwtQ==", - "license": "Apache-2.0", + "version": "6.14.2", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.14.2.tgz", + "integrity": "sha512-kMEHNo0F3P6QKDq17zcDuPeaywK/YaJVCEQRzPF3TOM/Bl9MFg64YE5Tu7ifj37qZJMhwU1tl2Ioivws5gRG5Q==", "dependencies": { - "bson": "^5.5.0", - "mongodb-connection-string-url": "^2.6.0", - "socks": "^2.7.1" + "@mongodb-js/saslprep": "^1.1.9", + "bson": "^6.10.3", + "mongodb-connection-string-url": "^3.0.0" }, "engines": { - "node": ">=14.20.1" - }, - "optionalDependencies": { - "@mongodb-js/saslprep": "^1.1.0" + "node": ">=16.20.1" }, "peerDependencies": { "@aws-sdk/credential-providers": "^3.188.0", - "@mongodb-js/zstd": "^1.0.0", - "kerberos": "^1.0.0 || ^2.0.0", - "mongodb-client-encryption": ">=2.3.0 <3", - "snappy": "^7.2.2" + "@mongodb-js/zstd": "^1.1.0 || ^2.0.0", + "gcp-metadata": "^5.2.0", + "kerberos": "^2.0.1", + "mongodb-client-encryption": ">=6.0.0 <7", + "snappy": "^7.2.2", + "socks": "^2.7.1" }, "peerDependenciesMeta": { "@aws-sdk/credential-providers": { @@ -41293,6 +41303,9 @@ "@mongodb-js/zstd": { "optional": true }, + "gcp-metadata": { + "optional": true + }, "kerberos": { "optional": true }, @@ -41301,25 +41314,36 @@ }, "snappy": { "optional": true + }, + "socks": { + "optional": true } } }, - "packages/data-schemas/node_modules/mongoose": { - "version": "7.8.6", - "resolved": "https://registry.npmjs.org/mongoose/-/mongoose-7.8.6.tgz", - "integrity": "sha512-1oVPRHvcmPVwk/zeSTEzayzQEVeYQM1D5zrkLsttfNNB7pPRUmkKeFu6gpbvyEswOuZLrWJjqB8kSTY+k2AZOA==", - "license": "MIT", + "packages/data-schemas/node_modules/mongodb-connection-string-url": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mongodb-connection-string-url/-/mongodb-connection-string-url-3.0.2.tgz", + "integrity": "sha512-rMO7CGo/9BFwyZABcKAWL8UJwH/Kc2x0g72uhDWzG48URRax5TCIcJ7Rc3RZqffZzO/Gwff/jyKwCU9TN8gehA==", "dependencies": { - "bson": "^5.5.0", - "kareem": "2.5.1", - "mongodb": "5.9.2", + "@types/whatwg-url": "^11.0.2", + "whatwg-url": "^14.1.0 || ^13.0.0" + } + }, + "packages/data-schemas/node_modules/mongoose": { + "version": "8.12.1", + "resolved": "https://registry.npmjs.org/mongoose/-/mongoose-8.12.1.tgz", + "integrity": "sha512-UW22y8QFVYmrb36hm8cGncfn4ARc/XsYWQwRTaj0gxtQk1rDuhzDO1eBantS+hTTatfAIS96LlRCJrcNHvW5+Q==", + "dependencies": { + "bson": "^6.10.3", + "kareem": "2.6.3", + "mongodb": "~6.14.0", "mpath": "0.9.0", "mquery": "5.0.0", "ms": "2.1.3", - "sift": "16.0.1" + "sift": "17.1.3" }, "engines": { - "node": ">=14.20.1" + "node": ">=16.20.1" }, "funding": { "type": "opencollective", @@ -41342,11 +41366,28 @@ "url": "https://github.com/sponsors/isaacs" } }, - "packages/data-schemas/node_modules/sift": { - "version": "16.0.1", - "resolved": "https://registry.npmjs.org/sift/-/sift-16.0.1.tgz", - "integrity": "sha512-Wv6BjQ5zbhW7VFefWusVP33T/EM0vYikCaQ2qR8yULbsilAT8/wQaXvuQ3ptGLpoKx+lihJE3y2UTgKDyyNHZQ==", - "license": "MIT" + "packages/data-schemas/node_modules/tr46": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz", + "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, + "packages/data-schemas/node_modules/whatwg-url": { + "version": "14.1.1", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.1.1.tgz", + "integrity": "sha512-mDGf9diDad/giZ/Sm9Xi2YcyzaFpbdLpJPr+E9fSkyQ7KpQD4SdFcugkRQYzhmfI4KeV4Qpnn2sKPdo+kmsgRQ==", + "dependencies": { + "tr46": "^5.0.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } }, "packages/mcp": { "name": "librechat-mcp", diff --git a/packages/data-schemas/package.json b/packages/data-schemas/package.json index 82f398e31b..1b62a372db 100644 --- a/packages/data-schemas/package.json +++ b/packages/data-schemas/package.json @@ -60,7 +60,7 @@ "access": "public" }, "dependencies": { - "mongoose": "^8.9.5" + "mongoose": "^8.12.1" }, "peerDependencies": { "keyv": "^4.5.4" From efed1c461de3c3cf143190b7ee37d5fe57e89cdb Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 12 Mar 2025 12:03:16 -0400 Subject: [PATCH 04/12] =?UTF-8?q?=F0=9F=A4=96=20feat:=20Support=20OpenAI?= =?UTF-8?q?=20Web=20Search=20models=20(#6313)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: reorder vision model entries for cheaper models first * fix: add endpoint property to bedrock client initialization * fix: exclude unsupported parameters for OpenAI Web Search models * fix: enhance options to exclude unsupported parameters for Web Search models --- api/app/clients/OpenAIClient.js | 23 ++++++++++++++++ .../services/Endpoints/bedrock/initialize.js | 3 ++- .../services/Endpoints/openAI/initialize.js | 9 +++---- api/server/services/Endpoints/openAI/llm.js | 26 +++++++++++++++++-- packages/data-provider/src/config.ts | 18 ++++++------- 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index 9a89e34879..a1ab496b5d 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -1272,6 +1272,29 @@ ${convo} }); } + /** Note: OpenAI Web Search models do not support any known parameters besdies `max_tokens` */ + if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model)) { + const searchExcludeParams = [ + 'frequency_penalty', + 'presence_penalty', + 'temperature', + 'top_p', + 'top_k', + 'stop', + 'logit_bias', + 'seed', + 'response_format', + 'n', + 'logprobs', + 'user', + ]; + + this.options.dropParams = this.options.dropParams || []; + this.options.dropParams = [ + ...new Set([...this.options.dropParams, ...searchExcludeParams]), + ]; + } + if (this.options.dropParams && Array.isArray(this.options.dropParams)) { this.options.dropParams.forEach((param) => { delete modelOptions[param]; diff --git a/api/server/services/Endpoints/bedrock/initialize.js b/api/server/services/Endpoints/bedrock/initialize.js index 3ffa03393d..51d0040566 100644 --- a/api/server/services/Endpoints/bedrock/initialize.js +++ b/api/server/services/Endpoints/bedrock/initialize.js @@ -23,8 +23,9 @@ const initializeClient = async ({ req, res, endpointOption }) => { const agent = { id: EModelEndpoint.bedrock, name: endpointOption.name, - instructions: endpointOption.promptPrefix, provider: EModelEndpoint.bedrock, + endpoint: EModelEndpoint.bedrock, + instructions: endpointOption.promptPrefix, model: endpointOption.model_parameters.model, model_parameters: endpointOption.model_parameters, }; diff --git a/api/server/services/Endpoints/openAI/initialize.js b/api/server/services/Endpoints/openAI/initialize.js index 5614804b68..4d358cef1a 100644 --- a/api/server/services/Endpoints/openAI/initialize.js +++ b/api/server/services/Endpoints/openAI/initialize.js @@ -135,12 +135,9 @@ const initializeClient = async ({ } if (optionsOnly) { - clientOptions = Object.assign( - { - modelOptions: endpointOption.model_parameters, - }, - clientOptions, - ); + const modelOptions = endpointOption.model_parameters; + modelOptions.model = modelName; + clientOptions = Object.assign({ modelOptions }, clientOptions); clientOptions.modelOptions.user = req.user.id; const options = getLLMConfig(apiKey, clientOptions); if (!clientOptions.streamRate) { diff --git a/api/server/services/Endpoints/openAI/llm.js b/api/server/services/Endpoints/openAI/llm.js index af19ece486..a8aeeb5b9d 100644 --- a/api/server/services/Endpoints/openAI/llm.js +++ b/api/server/services/Endpoints/openAI/llm.js @@ -28,7 +28,7 @@ const { isEnabled } = require('~/server/utils'); * @returns {Object} Configuration options for creating an LLM instance. */ function getLLMConfig(apiKey, options = {}, endpoint = null) { - const { + let { modelOptions = {}, reverseProxyUrl, defaultQuery, @@ -50,10 +50,32 @@ function getLLMConfig(apiKey, options = {}, endpoint = null) { if (addParams && typeof addParams === 'object') { Object.assign(llmConfig, addParams); } + /** Note: OpenAI Web Search models do not support any known parameters besdies `max_tokens` */ + if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model)) { + const searchExcludeParams = [ + 'frequency_penalty', + 'presence_penalty', + 'temperature', + 'top_p', + 'top_k', + 'stop', + 'logit_bias', + 'seed', + 'response_format', + 'n', + 'logprobs', + 'user', + ]; + + dropParams = dropParams || []; + dropParams = [...new Set([...dropParams, ...searchExcludeParams])]; + } if (dropParams && Array.isArray(dropParams)) { dropParams.forEach((param) => { - delete llmConfig[param]; + if (llmConfig[param]) { + llmConfig[param] = undefined; + } }); } diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 1cc0d5a785..af850b4180 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -827,28 +827,28 @@ export const supportsBalanceCheck = { }; export const visionModels = [ - 'grok-3', - 'grok-2-vision', 'grok-vision', - 'gpt-4.5', - 'gpt-4o', + 'grok-2-vision', + 'grok-3', 'gpt-4o-mini', - 'o1', + 'gpt-4o', 'gpt-4-turbo', 'gpt-4-vision', + 'o1', + 'gpt-4.5', 'llava', 'llava-13b', 'gemini-pro-vision', 'claude-3', - 'gemini-2.0', - 'gemini-1.5', 'gemini-exp', + 'gemini-1.5', + 'gemini-2.0', 'moondream', 'llama3.2-vision', - 'llama-3.2-90b-vision', 'llama-3.2-11b-vision', - 'llama-3-2-90b-vision', 'llama-3-2-11b-vision', + 'llama-3.2-90b-vision', + 'llama-3-2-90b-vision', ]; export enum VisionModes { generative = 'generative', From bc690cc320595515dc633cf39d34403332935311 Mon Sep 17 00:00:00 2001 From: Kunal <66168931+thecodingwizardx@users.noreply.github.com> Date: Sat, 15 Mar 2025 00:05:46 +0530 Subject: [PATCH 05/12] =?UTF-8?q?=F0=9F=94=A7=20fix:=20comment=20out=20MCP?= =?UTF-8?q?=20servers=20to=20resolve=20service=20run=20issues=20(#6316)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Coding Wizard --- librechat.example.yaml | 52 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/librechat.example.yaml b/librechat.example.yaml index 12c6ed9635..bc42ff89fc 100644 --- a/librechat.example.yaml +++ b/librechat.example.yaml @@ -109,32 +109,32 @@ actions: - "google.com" # Example MCP Servers Object Structure -mcpServers: - everything: - # type: sse # type can optionally be omitted - url: http://localhost:3001/sse - timeout: 60000 # 1 minute timeout for this server, this is the default timeout for MCP servers. - puppeteer: - type: stdio - command: npx - args: - - -y - - "@modelcontextprotocol/server-puppeteer" - timeout: 300000 # 5 minutes timeout for this server - filesystem: - # type: stdio - command: npx - args: - - -y - - "@modelcontextprotocol/server-filesystem" - - /home/user/LibreChat/ - iconPath: /home/user/LibreChat/client/public/assets/logo.svg - mcp-obsidian: - command: npx - args: - - -y - - "mcp-obsidian" - - /path/to/obsidian/vault +# mcpServers: +# everything: +# # type: sse # type can optionally be omitted +# url: http://localhost:3001/sse +# timeout: 60000 # 1 minute timeout for this server, this is the default timeout for MCP servers. +# puppeteer: +# type: stdio +# command: npx +# args: +# - -y +# - "@modelcontextprotocol/server-puppeteer" +# timeout: 300000 # 5 minutes timeout for this server +# filesystem: +# # type: stdio +# command: npx +# args: +# - -y +# - "@modelcontextprotocol/server-filesystem" +# - /home/user/LibreChat/ +# iconPath: /home/user/LibreChat/client/public/assets/logo.svg +# mcp-obsidian: +# command: npx +# args: +# - -y +# - "mcp-obsidian" +# - /path/to/obsidian/vault # Definition of custom endpoints endpoints: From d6a17784dcb7cdbc6d40fa31bb60fe8dcba8dfd8 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Mon, 17 Mar 2025 16:43:44 -0400 Subject: [PATCH 06/12] =?UTF-8?q?=F0=9F=94=97=20feat:=20Agent=20Chain=20(M?= =?UTF-8?q?ixture-of-Agents)=20(#6374)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * wip: first pass, dropdown for selecting sequential agents * refactor: Improve agent selection logic and enhance performance in SequentialAgents component * wip: seq. agents working ideas * wip: sequential agents style change * refactor: move agent form options/submission outside of AgentConfig * refactor: prevent repeating code * refactor: simplify current agent display in SequentialAgents component * feat: persist form value handling in AgentSelect component for agent_ids * feat: first pass, sequential agnets agent update * feat: enhance message display with agent updates and empty text handling * chore: update Icon component to use EModelEndpoint for agent endpoints * feat: update content type checks in BaseClient to use constants for better readability * feat: adjust max context tokens calculation to use 90% of the model's max tokens * feat: first pass, agent run message pruning * chore: increase max listeners for abort controller to prevent memory leaks * feat: enhance runAgent function to include current index count map for improved token tracking * chore: update @librechat/agents dependency to version 2.2.5 * feat: update icons and style of SequentialAgents component for improved UI consistency * feat: add AdvancedButton and AdvancedPanel components for enhanced agent settings navigation, update styling for agent form * chore: adjust minimum height of AdvancedPanel component for better layout consistency * chore: update @librechat/agents dependency to version 2.2.6 * feat: enhance message formatting by incorporating tool set into agent message processing, in order to allow better mix/matching of agents (as tool calls for tools not found in set will be stringified) * refactor: reorder components in AgentConfig for improved readability and maintainability * refactor: enhance layout of AgentUpdate component for improved visual structure * feat: add DeepSeek provider to Bedrock settings and schemas * feat: enhance link styling in mobile.css for better visibility and accessibility * fix: update banner model import in update banner script; export Banner model * refactor: `duplicateAgentHandler` to include tool_resources only for OCR context files * feat: add 'qwen-vl' to visionModels for enhanced model support * fix: change image format from JPEG to PNG in DALLE3 response * feat: reorganize Advanced components and add localizations * refactor: simplify JSX structure in AgentChain component to defer container styling to parent * feat: add FormInput component for reusable input handling * feat: make agent recursion limit configurable from builder * feat: add support for agent capabilities chain in AdvancedPanel and update data-provider version * feat: add maxRecursionLimit configuration for agents and update related documentation * fix: update CONFIG_VERSION to 1.2.3 in data provider configuration * feat: replace recursion limit input with MaxAgentSteps component and enhance input handling * feat: enhance AgentChain component with hover card for additional information and update related labels * fix: pass request and response objects to `createActionTool` when using assistant actions to prevent auth error * feat: update AgentChain component layout to include agent count display * feat: increase default max listeners and implement capability check function for agent chain * fix: update link styles in mobile.css for better visibility in dark mode * chore: temp. remove agents package while bumping shared packages * chore: update @langchain/google-genai package to version 0.1.11 * chore: update @langchain/google-vertexai package to version 0.2.2 * chore: add @librechat/agents package at version 2.2.8 * feat: add deepseek.r1 model with token rate and context values for bedrock --- api/app/clients/BaseClient.js | 11 +- api/app/clients/prompts/formatMessages.js | 2 +- api/app/clients/tools/structured/DALLE3.js | 2 +- api/models/Banner.js | 2 +- api/models/tx.js | 1 + api/models/tx.spec.js | 2 +- api/package.json | 6 +- api/server/controllers/agents/client.js | 113 +- api/server/controllers/agents/v1.js | 14 +- api/server/services/ActionService.js | 4 +- .../services/Config/getEndpointsConfig.js | 13 +- .../services/Endpoints/agents/initialize.js | 5 +- .../services/Endpoints/bedrock/initialize.js | 1 + api/server/services/Files/process.js | 13 +- api/server/services/ToolService.js | 7 +- api/server/utils/handleText.js | 1 + api/utils/tokens.js | 1 + api/utils/tokens.spec.js | 3 + client/src/common/agents-types.ts | 1 + client/src/common/types.ts | 2 + .../Chat/Messages/Content/ContentParts.tsx | 1 + .../Chat/Messages/Content/Markdown.tsx | 7 +- .../Chat/Messages/Content/MessageContent.tsx | 4 +- .../components/Chat/Messages/Content/Part.tsx | 250 +-- .../Messages/Content/Parts/AgentUpdate.tsx | 39 + .../Chat/Messages/Content/Parts/EmptyText.tsx | 17 + .../Chat/Messages/Content/Parts/Text.tsx | 4 +- .../SidePanel/Agents/AdminSettings.tsx | 2 +- .../Agents/Advanced/AdvancedButton.tsx | 27 + .../Agents/Advanced/AdvancedPanel.tsx | 55 + .../SidePanel/Agents/Advanced/AgentChain.tsx | 179 ++ .../Agents/Advanced/MaxAgentSteps.tsx | 52 + .../SidePanel/Agents/AgentConfig.tsx | 119 +- .../SidePanel/Agents/AgentFooter.tsx | 86 + .../SidePanel/Agents/AgentPanel.tsx | 18 + .../SidePanel/Agents/AgentPanelSkeleton.tsx | 2 +- .../SidePanel/Agents/AgentSelect.tsx | 19 + .../SidePanel/Agents/ModelPanel.tsx | 22 +- .../Agents/Sequential/HideSequential.tsx | 74 - .../Agents/Sequential/SequentialAgents.tsx | 153 -- .../SidePanel/Parameters/settings.ts | 2 + client/src/components/ui/FormInput.tsx | 62 + client/src/components/ui/index.ts | 1 + client/src/hooks/SSE/useStepHandler.ts | 39 +- client/src/locales/en/translation.json | 7 + client/src/mobile.css | 10 + config/update-banner.js | 2 +- librechat.example.yaml | 4 +- package-lock.json | 1578 +++++++---------- packages/data-provider/package.json | 2 +- packages/data-provider/src/config.ts | 6 +- packages/data-provider/src/schemas.ts | 2 + packages/data-provider/src/types/agents.ts | 17 +- .../data-provider/src/types/assistants.ts | 12 +- packages/data-provider/src/types/runs.ts | 1 + packages/data-schemas/package.json | 2 +- packages/data-schemas/src/schema/agent.ts | 4 + 57 files changed, 1626 insertions(+), 1459 deletions(-) create mode 100644 client/src/components/Chat/Messages/Content/Parts/AgentUpdate.tsx create mode 100644 client/src/components/Chat/Messages/Content/Parts/EmptyText.tsx create mode 100644 client/src/components/SidePanel/Agents/Advanced/AdvancedButton.tsx create mode 100644 client/src/components/SidePanel/Agents/Advanced/AdvancedPanel.tsx create mode 100644 client/src/components/SidePanel/Agents/Advanced/AgentChain.tsx create mode 100644 client/src/components/SidePanel/Agents/Advanced/MaxAgentSteps.tsx create mode 100644 client/src/components/SidePanel/Agents/AgentFooter.tsx delete mode 100644 client/src/components/SidePanel/Agents/Sequential/HideSequential.tsx delete mode 100644 client/src/components/SidePanel/Agents/Sequential/SequentialAgents.tsx create mode 100644 client/src/components/ui/FormInput.tsx diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 77e14c07d0..e24bffa18b 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -5,6 +5,7 @@ const { isAgentsEndpoint, isParamEndpoint, EModelEndpoint, + ContentTypes, excludedKeys, ErrorTypes, Constants, @@ -1021,11 +1022,17 @@ class BaseClient { const processValue = (value) => { if (Array.isArray(value)) { for (let item of value) { - if (!item || !item.type || item.type === 'image_url') { + if ( + !item || + !item.type || + item.type === ContentTypes.THINK || + item.type === ContentTypes.ERROR || + item.type === ContentTypes.IMAGE_URL + ) { continue; } - if (item.type === 'tool_call' && item.tool_call != null) { + if (item.type === ContentTypes.TOOL_CALL && item.tool_call != null) { const toolName = item.tool_call?.name || ''; if (toolName != null && toolName && typeof toolName === 'string') { numTokens += this.getTokenCount(toolName); diff --git a/api/app/clients/prompts/formatMessages.js b/api/app/clients/prompts/formatMessages.js index 4e8d3bd5a5..9fa0d40497 100644 --- a/api/app/clients/prompts/formatMessages.js +++ b/api/app/clients/prompts/formatMessages.js @@ -211,7 +211,7 @@ const formatAgentMessages = (payload) => { } else if (part.type === ContentTypes.THINK) { hasReasoning = true; continue; - } else if (part.type === ContentTypes.ERROR) { + } else if (part.type === ContentTypes.ERROR || part.type === ContentTypes.AGENT_UPDATE) { continue; } else { currentContent.push(part); diff --git a/api/app/clients/tools/structured/DALLE3.js b/api/app/clients/tools/structured/DALLE3.js index 81200e3a61..fc0f1851f6 100644 --- a/api/app/clients/tools/structured/DALLE3.js +++ b/api/app/clients/tools/structured/DALLE3.js @@ -172,7 +172,7 @@ Error Message: ${error.message}`); { type: ContentTypes.IMAGE_URL, image_url: { - url: `data:image/jpeg;base64,${base64}`, + url: `data:image/png;base64,${base64}`, }, }, ]; diff --git a/api/models/Banner.js b/api/models/Banner.js index 0f20faeba8..399a8e72ee 100644 --- a/api/models/Banner.js +++ b/api/models/Banner.js @@ -28,4 +28,4 @@ const getBanner = async (user) => { } }; -module.exports = { getBanner }; +module.exports = { Banner, getBanner }; diff --git a/api/models/tx.js b/api/models/tx.js index b534e7edc9..67301d0c49 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -61,6 +61,7 @@ const bedrockValues = { 'amazon.nova-micro-v1:0': { prompt: 0.035, completion: 0.14 }, 'amazon.nova-lite-v1:0': { prompt: 0.06, completion: 0.24 }, 'amazon.nova-pro-v1:0': { prompt: 0.8, completion: 3.2 }, + 'deepseek.r1': { prompt: 1.35, completion: 5.4 }, }; /** diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index b04eacc9f3..f612e222bb 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -288,7 +288,7 @@ describe('AWS Bedrock Model Tests', () => { }); describe('Deepseek Model Tests', () => { - const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner']; + const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner', 'deepseek.r1']; it('should return the correct prompt multipliers for all models', () => { const results = deepseekModels.map((model) => { diff --git a/api/package.json b/api/package.json index 9197691a72..ab2a82dbd8 100644 --- a/api/package.json +++ b/api/package.json @@ -42,10 +42,10 @@ "@keyv/redis": "^2.8.1", "@langchain/community": "^0.3.34", "@langchain/core": "^0.3.40", - "@langchain/google-genai": "^0.1.9", - "@langchain/google-vertexai": "^0.2.0", + "@langchain/google-genai": "^0.1.11", + "@langchain/google-vertexai": "^0.2.2", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.2.0", + "@librechat/agents": "^2.2.8", "@librechat/data-schemas": "*", "@waylaidwanderer/fetch-event-source": "^3.0.1", "axios": "^1.8.2", diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index d591fe3247..4b995bb06a 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -7,7 +7,16 @@ // validateVisionModel, // mapModelToAzureConfig, // } = require('librechat-data-provider'); -const { Callback, createMetadataAggregator } = require('@librechat/agents'); +require('events').EventEmitter.defaultMaxListeners = 100; +const { + Callback, + GraphEvents, + formatMessage, + formatAgentMessages, + formatContentStrings, + getTokenCountForMessage, + createMetadataAggregator, +} = require('@librechat/agents'); const { Constants, VisionModes, @@ -17,24 +26,19 @@ const { KnownEndpoints, anthropicSchema, isAgentsEndpoint, + AgentCapabilities, bedrockInputSchema, removeNullishValues, } = require('librechat-data-provider'); -const { - formatMessage, - addCacheControl, - formatAgentMessages, - formatContentStrings, - createContextHandlers, -} = require('~/app/clients/prompts'); +const { getCustomEndpointConfig, checkCapability } = require('~/server/services/Config'); +const { addCacheControl, createContextHandlers } = require('~/app/clients/prompts'); const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); const { getBufferString, HumanMessage } = require('@langchain/core/messages'); const { encodeAndFormat } = require('~/server/services/Files/images/encode'); -const { getCustomEndpointConfig } = require('~/server/services/Config'); const Tokenizer = require('~/server/services/Tokenizer'); const BaseClient = require('~/app/clients/BaseClient'); +const { logger, sendEvent } = require('~/config'); const { createRun } = require('./run'); -const { logger } = require('~/config'); /** @typedef {import('@librechat/agents').MessageContentComplex} MessageContentComplex */ /** @typedef {import('@langchain/core/runnables').RunnableConfig} RunnableConfig */ @@ -99,6 +103,8 @@ class AgentClient extends BaseClient { this.outputTokensKey = 'output_tokens'; /** @type {UsageMetadata} */ this.usage; + /** @type {Record} */ + this.indexTokenCountMap = {}; } /** @@ -377,6 +383,10 @@ class AgentClient extends BaseClient { })); } + for (let i = 0; i < messages.length; i++) { + this.indexTokenCountMap[i] = messages[i].tokenCount; + } + const result = { tokenCountMap, prompt: payload, @@ -622,6 +632,9 @@ class AgentClient extends BaseClient { // }); // } + /** @type {TCustomConfig['endpoints']['agents']} */ + const agentsEConfig = this.options.req.app.locals[EModelEndpoint.agents]; + /** @type {Partial & { version: 'v1' | 'v2'; run_id?: string; streamMode: string }} */ const config = { configurable: { @@ -629,19 +642,30 @@ class AgentClient extends BaseClient { last_agent_index: this.agentConfigs?.size ?? 0, hide_sequential_outputs: this.options.agent.hide_sequential_outputs, }, - recursionLimit: this.options.req.app.locals[EModelEndpoint.agents]?.recursionLimit, + recursionLimit: agentsEConfig?.recursionLimit, signal: abortController.signal, streamMode: 'values', version: 'v2', }; - const initialMessages = formatAgentMessages(payload); + const toolSet = new Set((this.options.agent.tools ?? []).map((tool) => tool && tool.name)); + let { messages: initialMessages, indexTokenCountMap } = formatAgentMessages( + payload, + this.indexTokenCountMap, + toolSet, + ); if (legacyContentEndpoints.has(this.options.agent.endpoint)) { - formatContentStrings(initialMessages); + initialMessages = formatContentStrings(initialMessages); } /** @type {ReturnType} */ let run; + const countTokens = ((text) => this.getTokenCount(text)).bind(this); + + /** @type {(message: BaseMessage) => number} */ + const tokenCounter = (message) => { + return getTokenCountForMessage(message, countTokens); + }; /** * @@ -649,12 +673,23 @@ class AgentClient extends BaseClient { * @param {BaseMessage[]} messages * @param {number} [i] * @param {TMessageContentParts[]} [contentData] + * @param {Record} [currentIndexCountMap] */ - const runAgent = async (agent, _messages, i = 0, contentData = []) => { + const runAgent = async (agent, _messages, i = 0, contentData = [], _currentIndexCountMap) => { config.configurable.model = agent.model_parameters.model; + const currentIndexCountMap = _currentIndexCountMap ?? indexTokenCountMap; if (i > 0) { this.model = agent.model_parameters.model; } + if (agent.recursion_limit && typeof agent.recursion_limit === 'number') { + config.recursionLimit = agent.recursion_limit; + } + if ( + agentsEConfig?.maxRecursionLimit && + config.recursionLimit > agentsEConfig?.maxRecursionLimit + ) { + config.recursionLimit = agentsEConfig?.maxRecursionLimit; + } config.configurable.agent_id = agent.id; config.configurable.name = agent.name; config.configurable.agent_index = i; @@ -717,11 +752,29 @@ class AgentClient extends BaseClient { } if (contentData.length) { + const agentUpdate = { + type: ContentTypes.AGENT_UPDATE, + [ContentTypes.AGENT_UPDATE]: { + index: contentData.length, + runId: this.responseMessageId, + agentId: agent.id, + }, + }; + const streamData = { + event: GraphEvents.ON_AGENT_UPDATE, + data: agentUpdate, + }; + this.options.aggregateContent(streamData); + sendEvent(this.options.res, streamData); + contentData.push(agentUpdate); run.Graph.contentData = contentData; } await run.processStream({ messages }, config, { keepContent: i !== 0, + tokenCounter, + indexTokenCountMap: currentIndexCountMap, + maxContextTokens: agent.maxContextTokens, callbacks: { [Callback.TOOL_ERROR]: (graph, error, toolId) => { logger.error( @@ -735,9 +788,13 @@ class AgentClient extends BaseClient { }; await runAgent(this.options.agent, initialMessages); - let finalContentStart = 0; - if (this.agentConfigs && this.agentConfigs.size > 0) { + if ( + this.agentConfigs && + this.agentConfigs.size > 0 && + (await checkCapability(this.options.req, AgentCapabilities.chain)) + ) { + const windowSize = 5; let latestMessage = initialMessages.pop().content; if (typeof latestMessage !== 'string') { latestMessage = latestMessage[0].text; @@ -745,7 +802,16 @@ class AgentClient extends BaseClient { let i = 1; let runMessages = []; - const lastFiveMessages = initialMessages.slice(-5); + const windowIndexCountMap = {}; + const windowMessages = initialMessages.slice(-windowSize); + let currentIndex = 4; + for (let i = initialMessages.length - 1; i >= 0; i--) { + windowIndexCountMap[currentIndex] = indexTokenCountMap[i]; + currentIndex--; + if (currentIndex < 0) { + break; + } + } for (const [agentId, agent] of this.agentConfigs) { if (abortController.signal.aborted === true) { break; @@ -780,7 +846,9 @@ class AgentClient extends BaseClient { } try { const contextMessages = []; - for (const message of lastFiveMessages) { + const runIndexCountMap = {}; + for (let i = 0; i < windowMessages.length; i++) { + const message = windowMessages[i]; const messageType = message._getType(); if ( (!agent.tools || agent.tools.length === 0) && @@ -788,11 +856,13 @@ class AgentClient extends BaseClient { ) { continue; } - + runIndexCountMap[contextMessages.length] = windowIndexCountMap[i]; contextMessages.push(message); } - const currentMessages = [...contextMessages, new HumanMessage(bufferString)]; - await runAgent(agent, currentMessages, i, contentData); + const bufferMessage = new HumanMessage(bufferString); + runIndexCountMap[contextMessages.length] = tokenCounter(bufferMessage); + const currentMessages = [...contextMessages, bufferMessage]; + await runAgent(agent, currentMessages, i, contentData, runIndexCountMap); } catch (err) { logger.error( `[api/server/controllers/agents/client.js #chatCompletion] Error running agent ${agentId} (${i})`, @@ -803,6 +873,7 @@ class AgentClient extends BaseClient { } } + /** Note: not implemented */ if (config.configurable.hide_sequential_outputs !== true) { finalContentStart = 0; } diff --git a/api/server/controllers/agents/v1.js b/api/server/controllers/agents/v1.js index 08327ec61c..731dee69a2 100644 --- a/api/server/controllers/agents/v1.js +++ b/api/server/controllers/agents/v1.js @@ -1,10 +1,11 @@ const fs = require('fs').promises; const { nanoid } = require('nanoid'); const { - FileContext, - Constants, Tools, + Constants, + FileContext, SystemRoles, + EToolResources, actionDelimiter, } = require('librechat-data-provider'); const { @@ -203,14 +204,21 @@ const duplicateAgentHandler = async (req, res) => { } const { - _id: __id, id: _id, + _id: __id, author: _author, createdAt: _createdAt, updatedAt: _updatedAt, + tool_resources: _tool_resources = {}, ...cloneData } = agent; + if (_tool_resources?.[EToolResources.ocr]) { + cloneData.tool_resources = { + [EToolResources.ocr]: _tool_resources[EToolResources.ocr], + }; + } + const newAgentId = `agent_${nanoid()}`; const newAgentData = Object.assign(cloneData, { id: newAgentId, diff --git a/api/server/services/ActionService.js b/api/server/services/ActionService.js index 660e7aeb0d..c332cdfcf1 100644 --- a/api/server/services/ActionService.js +++ b/api/server/services/ActionService.js @@ -161,9 +161,9 @@ async function createActionTool({ if (metadata.auth && metadata.auth.type !== AuthTypeEnum.None) { try { - const action_id = action.action_id; - const identifier = `${req.user.id}:${action.action_id}`; if (metadata.auth.type === AuthTypeEnum.OAuth && metadata.auth.authorization_url) { + const action_id = action.action_id; + const identifier = `${req.user.id}:${action.action_id}`; const requestLogin = async () => { const { args: _args, stepId, ...toolCall } = config.toolCall ?? {}; if (!stepId) { diff --git a/api/server/services/Config/getEndpointsConfig.js b/api/server/services/Config/getEndpointsConfig.js index 4f8bde68ad..016f5f7445 100644 --- a/api/server/services/Config/getEndpointsConfig.js +++ b/api/server/services/Config/getEndpointsConfig.js @@ -72,4 +72,15 @@ async function getEndpointsConfig(req) { return endpointsConfig; } -module.exports = { getEndpointsConfig }; +/** + * @param {ServerRequest} req + * @param {import('librechat-data-provider').AgentCapabilities} capability + * @returns {Promise} + */ +const checkCapability = async (req, capability) => { + const endpointsConfig = await getEndpointsConfig(req); + const capabilities = endpointsConfig?.[EModelEndpoint.agents]?.capabilities ?? []; + return capabilities.includes(capability); +}; + +module.exports = { getEndpointsConfig, checkCapability }; diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index 11c8dc6fc4..cb539d3bc7 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -204,8 +204,7 @@ const initializeAgentOptions = async ({ toolContextMap, maxContextTokens: agent.max_context_tokens ?? - getModelMaxTokens(tokensModel, providerEndpointMap[provider]) ?? - 4000, + (getModelMaxTokens(tokensModel, providerEndpointMap[provider]) ?? 4000) * 0.9, }; }; @@ -275,11 +274,13 @@ const initializeClient = async ({ req, res, endpointOption }) => { const client = new AgentClient({ req, + res, sender, contentParts, agentConfigs, eventHandlers, collectedUsage, + aggregateContent, artifactPromises, agent: primaryConfig, spec: endpointOption.spec, diff --git a/api/server/services/Endpoints/bedrock/initialize.js b/api/server/services/Endpoints/bedrock/initialize.js index 51d0040566..4d9ba361cf 100644 --- a/api/server/services/Endpoints/bedrock/initialize.js +++ b/api/server/services/Endpoints/bedrock/initialize.js @@ -55,6 +55,7 @@ const initializeClient = async ({ req, res, endpointOption }) => { const client = new AgentClient({ req, + res, agent, sender, // tools, diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index 1bfadc4b23..78a4976e2f 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -29,7 +29,7 @@ const { addAgentResourceFile, removeAgentResourceFiles } = require('~/models/Age const { getOpenAIClient } = require('~/server/controllers/assistants/helpers'); const { createFile, updateFileUsage, deleteFiles } = require('~/models/File'); const { loadAuthValues } = require('~/server/services/Tools/credentials'); -const { getEndpointsConfig } = require('~/server/services/Config'); +const { checkCapability } = require('~/server/services/Config'); const { LB_QueueAsyncCall } = require('~/server/utils/queue'); const { getStrategyFunctions } = require('./strategies'); const { determineFileType } = require('~/server/utils'); @@ -457,17 +457,6 @@ const processFileUpload = async ({ req, res, metadata }) => { res.status(200).json({ message: 'File uploaded and processed successfully', ...result }); }; -/** - * @param {ServerRequest} req - * @param {AgentCapabilities} capability - * @returns {Promise} - */ -const checkCapability = async (req, capability) => { - const endpointsConfig = await getEndpointsConfig(req); - const capabilities = endpointsConfig?.[EModelEndpoint.agents]?.capabilities ?? []; - return capabilities.includes(capability); -}; - /** * Applies the current strategy for file uploads. * Saves file metadata to the database with an expiry TTL. diff --git a/api/server/services/ToolService.js b/api/server/services/ToolService.js index f3e4efb6e3..969ca8d8ff 100644 --- a/api/server/services/ToolService.js +++ b/api/server/services/ToolService.js @@ -362,7 +362,12 @@ async function processRequiredActions(client, requiredActions) { continue; } - tool = await createActionTool({ action: actionSet, requestBuilder }); + tool = await createActionTool({ + req: client.req, + res: client.res, + action: actionSet, + requestBuilder, + }); if (!tool) { logger.warn( `Invalid action: user: ${client.req.user.id} | thread_id: ${requiredActions[0].thread_id} | run_id: ${requiredActions[0].run_id} | toolName: ${currentAction.tool}`, diff --git a/api/server/utils/handleText.js b/api/server/utils/handleText.js index 1b9cda28d7..f593d6c866 100644 --- a/api/server/utils/handleText.js +++ b/api/server/utils/handleText.js @@ -204,6 +204,7 @@ function generateConfig(key, baseURL, endpoint) { AgentCapabilities.actions, AgentCapabilities.tools, AgentCapabilities.ocr, + AgentCapabilities.chain, ]; } diff --git a/api/utils/tokens.js b/api/utils/tokens.js index 8edfb0a31c..58aaf7051b 100644 --- a/api/utils/tokens.js +++ b/api/utils/tokens.js @@ -92,6 +92,7 @@ const anthropicModels = { const deepseekModels = { 'deepseek-reasoner': 63000, // -1000 from max (API) deepseek: 63000, // -1000 from max (API) + 'deepseek.r1': 127500, }; const metaModels = { diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index d4dbb30498..e5ae21b646 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -423,6 +423,9 @@ describe('Meta Models Tests', () => { expect(getModelMaxTokens('deepseek-reasoner')).toBe( maxTokensMap[EModelEndpoint.openAI]['deepseek-reasoner'], ); + expect(getModelMaxTokens('deepseek.r1')).toBe( + maxTokensMap[EModelEndpoint.openAI]['deepseek.r1'], + ); }); }); diff --git a/client/src/common/agents-types.ts b/client/src/common/agents-types.ts index f4b8aac9fe..982cbfdb17 100644 --- a/client/src/common/agents-types.ts +++ b/client/src/common/agents-types.ts @@ -28,4 +28,5 @@ export type AgentForm = { provider?: AgentProvider | OptionWithIcon; agent_ids?: string[]; [AgentCapabilities.artifacts]?: ArtifactModes | string; + recursion_limit?: number; } & TAgentCapabilities; diff --git a/client/src/common/types.ts b/client/src/common/types.ts index f0b2b8a238..975f468930 100644 --- a/client/src/common/types.ts +++ b/client/src/common/types.ts @@ -131,6 +131,7 @@ export interface DataColumnMeta { } export enum Panel { + advanced = 'advanced', builder = 'builder', actions = 'actions', model = 'model', @@ -181,6 +182,7 @@ export type AgentPanelProps = { activePanel?: string; action?: t.Action; actions?: t.Action[]; + createMutation: UseMutationResult; setActivePanel: React.Dispatch>; setAction: React.Dispatch>; endpointsConfig?: t.TEndpointsConfig; diff --git a/client/src/components/Chat/Messages/Content/ContentParts.tsx b/client/src/components/Chat/Messages/Content/ContentParts.tsx index ddf08976eb..3805e0bb41 100644 --- a/client/src/components/Chat/Messages/Content/ContentParts.tsx +++ b/client/src/components/Chat/Messages/Content/ContentParts.tsx @@ -139,6 +139,7 @@ const ContentParts = memo( isSubmitting={isSubmitting} key={`part-${messageId}-${idx}`} isCreatedByUser={isCreatedByUser} + isLast={idx === content.length - 1} showCursor={idx === content.length - 1 && isLast} /> diff --git a/client/src/components/Chat/Messages/Content/Markdown.tsx b/client/src/components/Chat/Messages/Content/Markdown.tsx index e01de091c7..ee134b0e53 100644 --- a/client/src/components/Chat/Messages/Content/Markdown.tsx +++ b/client/src/components/Chat/Messages/Content/Markdown.tsx @@ -166,15 +166,12 @@ export const p: React.ElementType = memo(({ children }: TParagraphProps) => { return

{children}

; }); -const cursor = ' '; - type TContentProps = { content: string; - showCursor?: boolean; isLatestMessage: boolean; }; -const Markdown = memo(({ content = '', showCursor, isLatestMessage }: TContentProps) => { +const Markdown = memo(({ content = '', isLatestMessage }: TContentProps) => { const LaTeXParsing = useRecoilValue(store.LaTeXParsing); const isInitializing = content === ''; @@ -240,7 +237,7 @@ const Markdown = memo(({ content = '', showCursor, isLatestMessage }: TContentPr } } > - {isLatestMessage && (showCursor ?? false) ? currentContent + cursor : currentContent} + {currentContent} diff --git a/client/src/components/Chat/Messages/Content/MessageContent.tsx b/client/src/components/Chat/Messages/Content/MessageContent.tsx index 1547a01d80..f70a15b779 100644 --- a/client/src/components/Chat/Messages/Content/MessageContent.tsx +++ b/client/src/components/Chat/Messages/Content/MessageContent.tsx @@ -83,9 +83,7 @@ const DisplayMessage = ({ text, isCreatedByUser, message, showCursor }: TDisplay let content: React.ReactElement; if (!isCreatedByUser) { - content = ( - - ); + content = ; } else if (enableUserMsgMarkdown) { content = ; } else { diff --git a/client/src/components/Chat/Messages/Content/Part.tsx b/client/src/components/Chat/Messages/Content/Part.tsx index 2430bee6f9..1351efd59c 100644 --- a/client/src/components/Chat/Messages/Content/Part.tsx +++ b/client/src/components/Chat/Messages/Content/Part.tsx @@ -8,9 +8,11 @@ import { import { memo } from 'react'; import type { TMessageContentParts, TAttachment } from 'librechat-data-provider'; import { ErrorMessage } from './MessageContent'; +import AgentUpdate from './Parts/AgentUpdate'; import ExecuteCode from './Parts/ExecuteCode'; import RetrievalCall from './RetrievalCall'; import Reasoning from './Parts/Reasoning'; +import EmptyText from './Parts/EmptyText'; import CodeAnalyze from './CodeAnalyze'; import Container from './Container'; import ToolCall from './ToolCall'; @@ -20,145 +22,159 @@ import Image from './Image'; type PartProps = { part?: TMessageContentParts; + isLast?: boolean; isSubmitting: boolean; showCursor: boolean; isCreatedByUser: boolean; attachments?: TAttachment[]; }; -const Part = memo(({ part, isSubmitting, attachments, showCursor, isCreatedByUser }: PartProps) => { - if (!part) { - return null; - } - - if (part.type === ContentTypes.ERROR) { - return ( - - ); - } else if (part.type === ContentTypes.TEXT) { - const text = typeof part.text === 'string' ? part.text : part.text.value; - - if (typeof text !== 'string') { - return null; - } - if (part.tool_call_ids != null && !text) { - return null; - } - return ( - - - - ); - } else if (part.type === ContentTypes.THINK) { - const reasoning = typeof part.think === 'string' ? part.think : part.think.value; - if (typeof reasoning !== 'string') { - return null; - } - return ; - } else if (part.type === ContentTypes.TOOL_CALL) { - const toolCall = part[ContentTypes.TOOL_CALL]; - - if (!toolCall) { +const Part = memo( + ({ part, isSubmitting, attachments, isLast, showCursor, isCreatedByUser }: PartProps) => { + if (!part) { return null; } - const isToolCall = - 'args' in toolCall && (!toolCall.type || toolCall.type === ToolCallTypes.TOOL_CALL); - if (isToolCall && toolCall.name === Tools.execute_code) { + if (part.type === ContentTypes.ERROR) { return ( - ); - } else if (isToolCall) { + } else if (part.type === ContentTypes.AGENT_UPDATE) { return ( - - ); - } else if (toolCall.type === ToolCallTypes.CODE_INTERPRETER) { - const code_interpreter = toolCall[ToolCallTypes.CODE_INTERPRETER]; - return ( - - ); - } else if ( - toolCall.type === ToolCallTypes.RETRIEVAL || - toolCall.type === ToolCallTypes.FILE_SEARCH - ) { - return ( - - ); - } else if ( - toolCall.type === ToolCallTypes.FUNCTION && - ToolCallTypes.FUNCTION in toolCall && - imageGenTools.has(toolCall.function.name) - ) { - return ( - - ); - } else if (toolCall.type === ToolCallTypes.FUNCTION && ToolCallTypes.FUNCTION in toolCall) { - if (isImageVisionTool(toolCall)) { - if (isSubmitting && showCursor) { - return ( + <> + + {isLast && showCursor && ( - + - ); - } + )} + + ); + } else if (part.type === ContentTypes.TEXT) { + const text = typeof part.text === 'string' ? part.text : part.text.value; + + if (typeof text !== 'string') { + return null; + } + if (part.tool_call_ids != null && !text) { + return null; + } + return ( + + + + ); + } else if (part.type === ContentTypes.THINK) { + const reasoning = typeof part.think === 'string' ? part.think : part.think.value; + if (typeof reasoning !== 'string') { + return null; + } + return ; + } else if (part.type === ContentTypes.TOOL_CALL) { + const toolCall = part[ContentTypes.TOOL_CALL]; + + if (!toolCall) { return null; } + const isToolCall = + 'args' in toolCall && (!toolCall.type || toolCall.type === ToolCallTypes.TOOL_CALL); + if (isToolCall && toolCall.name === Tools.execute_code) { + return ( + + ); + } else if (isToolCall) { + return ( + + ); + } else if (toolCall.type === ToolCallTypes.CODE_INTERPRETER) { + const code_interpreter = toolCall[ToolCallTypes.CODE_INTERPRETER]; + return ( + + ); + } else if ( + toolCall.type === ToolCallTypes.RETRIEVAL || + toolCall.type === ToolCallTypes.FILE_SEARCH + ) { + return ( + + ); + } else if ( + toolCall.type === ToolCallTypes.FUNCTION && + ToolCallTypes.FUNCTION in toolCall && + imageGenTools.has(toolCall.function.name) + ) { + return ( + + ); + } else if (toolCall.type === ToolCallTypes.FUNCTION && ToolCallTypes.FUNCTION in toolCall) { + if (isImageVisionTool(toolCall)) { + if (isSubmitting && showCursor) { + return ( + + + + ); + } + return null; + } + + return ( + + ); + } + } else if (part.type === ContentTypes.IMAGE_FILE) { + const imageFile = part[ContentTypes.IMAGE_FILE]; + const height = imageFile.height ?? 1920; + const width = imageFile.width ?? 1080; return ( - ); } - } else if (part.type === ContentTypes.IMAGE_FILE) { - const imageFile = part[ContentTypes.IMAGE_FILE]; - const height = imageFile.height ?? 1920; - const width = imageFile.width ?? 1080; - return ( - - ); - } - return null; -}); + return null; + }, +); export default Part; diff --git a/client/src/components/Chat/Messages/Content/Parts/AgentUpdate.tsx b/client/src/components/Chat/Messages/Content/Parts/AgentUpdate.tsx new file mode 100644 index 0000000000..4dca00107e --- /dev/null +++ b/client/src/components/Chat/Messages/Content/Parts/AgentUpdate.tsx @@ -0,0 +1,39 @@ +import React, { useMemo } from 'react'; +import { EModelEndpoint } from 'librechat-data-provider'; +import { useAgentsMapContext } from '~/Providers'; +import Icon from '~/components/Endpoints/Icon'; + +interface AgentUpdateProps { + currentAgentId: string; +} + +const AgentUpdate: React.FC = ({ currentAgentId }) => { + const agentsMap = useAgentsMapContext() || {}; + const currentAgent = useMemo(() => agentsMap[currentAgentId], [agentsMap, currentAgentId]); + if (!currentAgentId) { + return null; + } + return ( +
+
+
+
+
+
+
+
+
+ +
+
{currentAgent?.name}
+
+
+ ); +}; + +export default AgentUpdate; diff --git a/client/src/components/Chat/Messages/Content/Parts/EmptyText.tsx b/client/src/components/Chat/Messages/Content/Parts/EmptyText.tsx new file mode 100644 index 0000000000..1b514164df --- /dev/null +++ b/client/src/components/Chat/Messages/Content/Parts/EmptyText.tsx @@ -0,0 +1,17 @@ +import { memo } from 'react'; + +const EmptyTextPart = memo(() => { + return ( +
+
+
+

+ +

+
+
+
+ ); +}); + +export default EmptyTextPart; diff --git a/client/src/components/Chat/Messages/Content/Parts/Text.tsx b/client/src/components/Chat/Messages/Content/Parts/Text.tsx index 7c207f1512..d4a605aea5 100644 --- a/client/src/components/Chat/Messages/Content/Parts/Text.tsx +++ b/client/src/components/Chat/Messages/Content/Parts/Text.tsx @@ -29,9 +29,7 @@ const TextPart = memo(({ text, isCreatedByUser, showCursor }: TextPartProps) => const content: ContentType = useMemo(() => { if (!isCreatedByUser) { - return ( - - ); + return ; } else if (enableUserMsgMarkdown) { return ; } else { diff --git a/client/src/components/SidePanel/Agents/AdminSettings.tsx b/client/src/components/SidePanel/Agents/AdminSettings.tsx index 6ca21d1317..5fb13fd045 100644 --- a/client/src/components/SidePanel/Agents/AdminSettings.tsx +++ b/client/src/components/SidePanel/Agents/AdminSettings.tsx @@ -142,7 +142,7 @@ const AdminSettings = () => { + ); +}; + +export default AdvancedButton; diff --git a/client/src/components/SidePanel/Agents/Advanced/AdvancedPanel.tsx b/client/src/components/SidePanel/Agents/Advanced/AdvancedPanel.tsx new file mode 100644 index 0000000000..0ead79cd32 --- /dev/null +++ b/client/src/components/SidePanel/Agents/Advanced/AdvancedPanel.tsx @@ -0,0 +1,55 @@ +import { useMemo } from 'react'; +import { ChevronLeft } from 'lucide-react'; +import { AgentCapabilities } from 'librechat-data-provider'; +import { useFormContext, Controller } from 'react-hook-form'; +import type { AgentForm, AgentPanelProps } from '~/common'; +import MaxAgentSteps from './MaxAgentSteps'; +import AgentChain from './AgentChain'; +import { useLocalize } from '~/hooks'; +import { Panel } from '~/common'; + +export default function AdvancedPanel({ + agentsConfig, + setActivePanel, +}: Pick) { + const localize = useLocalize(); + const methods = useFormContext(); + const { control, watch } = methods; + const currentAgentId = watch('id'); + const chainEnabled = useMemo( + () => agentsConfig?.capabilities.includes(AgentCapabilities.chain) ?? false, + [agentsConfig], + ); + + return ( +
+
+
+ +
+
{localize('com_ui_advanced_settings')}
+
+
+ + {chainEnabled && ( + } + /> + )} +
+
+ ); +} diff --git a/client/src/components/SidePanel/Agents/Advanced/AgentChain.tsx b/client/src/components/SidePanel/Agents/Advanced/AgentChain.tsx new file mode 100644 index 0000000000..1380927115 --- /dev/null +++ b/client/src/components/SidePanel/Agents/Advanced/AgentChain.tsx @@ -0,0 +1,179 @@ +import { X, Link2, PlusCircle } from 'lucide-react'; +import { EModelEndpoint } from 'librechat-data-provider'; +import React, { useState, useMemo, useCallback, useEffect } from 'react'; +import type { ControllerRenderProps } from 'react-hook-form'; +import type { AgentForm, OptionWithIcon } from '~/common'; +import ControlCombobox from '~/components/ui/ControlCombobox'; +import { HoverCard, HoverCardPortal, HoverCardContent, HoverCardTrigger } from '~/components/ui'; +import { CircleHelpIcon } from '~/components/svg'; +import { useAgentsMapContext } from '~/Providers'; +import Icon from '~/components/Endpoints/Icon'; +import { useLocalize } from '~/hooks'; +import { ESide } from '~/common'; + +interface AgentChainProps { + field: ControllerRenderProps; + currentAgentId: string; +} + +/** TODO: make configurable */ +const MAX_AGENTS = 10; + +const AgentChain: React.FC = ({ field, currentAgentId }) => { + const localize = useLocalize(); + const [newAgentId, setNewAgentId] = useState(''); + const agentsMap = useAgentsMapContext() || {}; + const agentIds = field.value || []; + + const agents = useMemo(() => Object.values(agentsMap), [agentsMap]); + + const selectableAgents = useMemo( + () => + agents + .filter((agent) => agent?.id !== currentAgentId) + .map( + (agent) => + ({ + label: agent?.name || '', + value: agent?.id, + icon: ( + + ), + }) as OptionWithIcon, + ), + [agents, currentAgentId], + ); + + const getAgentDetails = useCallback((id: string) => agentsMap[id], [agentsMap]); + + useEffect(() => { + if (newAgentId && agentIds.length < MAX_AGENTS) { + field.onChange([...agentIds, newAgentId]); + setNewAgentId(''); + } + }, [newAgentId, agentIds, field]); + + const removeAgentAt = (index: number) => { + field.onChange(agentIds.filter((_, i) => i !== index)); + }; + + const updateAgentAt = (index: number, id: string) => { + const updated = [...agentIds]; + updated[index] = id; + field.onChange(updated); + }; + + return ( + +
+
+ + + + +
+
+ {agentIds.length} / {MAX_AGENTS} +
+
+
+ {/* Current fixed agent */} +
+
+
+ +
+
+ {getAgentDetails(currentAgentId)?.name} +
+
+
+ {} + {agentIds.map((agentId, idx) => ( + +
+ updateAgentAt(idx, id)} + selectPlaceholder={localize('com_ui_agent_var', { 0: localize('com_ui_select') })} + searchPlaceholder={localize('com_ui_agent_var', { 0: localize('com_ui_search') })} + items={selectableAgents} + displayValue={getAgentDetails(agentId)?.name ?? ''} + SelectIcon={ + + } + className="flex-1 border-border-heavy" + containerClassName="px-0" + /> + {/* Future Settings button? */} + {/* */} + +
+ {idx < agentIds.length - 1 && ( + + )} +
+ ))} + + {agentIds.length < MAX_AGENTS && ( + <> + {agentIds.length > 0 && } + } + /> + + )} + + {agentIds.length >= MAX_AGENTS && ( +

+ {localize('com_ui_agent_chain_max', { 0: MAX_AGENTS })} +

+ )} +
+ + +
+

{localize('com_ui_agent_chain_info')}

+
+
+
+
+ ); +}; + +export default AgentChain; diff --git a/client/src/components/SidePanel/Agents/Advanced/MaxAgentSteps.tsx b/client/src/components/SidePanel/Agents/Advanced/MaxAgentSteps.tsx new file mode 100644 index 0000000000..5e334282f9 --- /dev/null +++ b/client/src/components/SidePanel/Agents/Advanced/MaxAgentSteps.tsx @@ -0,0 +1,52 @@ +import { useFormContext, Controller } from 'react-hook-form'; +import type { AgentForm } from '~/common'; +import { + HoverCard, + FormInput, + HoverCardPortal, + HoverCardContent, + HoverCardTrigger, +} from '~/components/ui'; +import { CircleHelpIcon } from '~/components/svg'; +import { useLocalize } from '~/hooks'; +import { ESide } from '~/common'; + +export default function AdvancedPanel() { + const localize = useLocalize(); + const methods = useFormContext(); + const { control } = methods; + + return ( + + ( + + + + } + /> + )} + /> + + +
+

+ {localize('com_ui_agent_recursion_limit_info')} +

+
+
+
+
+ ); +} diff --git a/client/src/components/SidePanel/Agents/AgentConfig.tsx b/client/src/components/SidePanel/Agents/AgentConfig.tsx index 7ece82bed1..864ecd8173 100644 --- a/client/src/components/SidePanel/Agents/AgentConfig.tsx +++ b/client/src/components/SidePanel/Agents/AgentConfig.tsx @@ -1,32 +1,19 @@ import React, { useState, useMemo, useCallback } from 'react'; import { useQueryClient } from '@tanstack/react-query'; import { Controller, useWatch, useFormContext } from 'react-hook-form'; -import { - QueryKeys, - SystemRoles, - Permissions, - EModelEndpoint, - PermissionTypes, - AgentCapabilities, -} from 'librechat-data-provider'; +import { QueryKeys, EModelEndpoint, AgentCapabilities } from 'librechat-data-provider'; import type { TPlugin } from 'librechat-data-provider'; import type { AgentForm, AgentPanelProps, IconComponentTypes } from '~/common'; import { cn, defaultTextProps, removeFocusOutlines, getEndpointField, getIconKey } from '~/utils'; -import { useCreateAgentMutation, useUpdateAgentMutation } from '~/data-provider'; -import { useLocalize, useAuthContext, useHasAccess } from '~/hooks'; import { useToastContext, useFileMapContext } from '~/Providers'; import { icons } from '~/components/Chat/Menus/Endpoints/Icons'; import Action from '~/components/SidePanel/Builder/Action'; import { ToolSelectDialog } from '~/components/Tools'; -import DuplicateAgent from './DuplicateAgent'; import { processAgentOption } from '~/utils'; -import AdminSettings from './AdminSettings'; -import DeleteButton from './DeleteButton'; import AgentAvatar from './AgentAvatar'; import FileContext from './FileContext'; -import { Spinner } from '~/components'; +import { useLocalize } from '~/hooks'; import FileSearch from './FileSearch'; -import ShareAgent from './ShareAgent'; import Artifacts from './Artifacts'; import AgentTool from './AgentTool'; import CodeForm from './Code/Form'; @@ -43,11 +30,10 @@ export default function AgentConfig({ setAction, actions = [], agentsConfig, - endpointsConfig, + createMutation, setActivePanel, - setCurrentAgentId, + endpointsConfig, }: AgentPanelProps) { - const { user } = useAuthContext(); const fileMap = useFileMapContext(); const queryClient = useQueryClient(); @@ -66,11 +52,6 @@ export default function AgentConfig({ const tools = useWatch({ control, name: 'tools' }); const agent_id = useWatch({ control, name: 'id' }); - const hasAccessToShareAgents = useHasAccess({ - permissionType: PermissionTypes.AGENTS, - permission: Permissions.SHARED_GLOBAL, - }); - const toolsEnabled = useMemo( () => agentsConfig?.capabilities.includes(AgentCapabilities.tools), [agentsConfig], @@ -156,46 +137,6 @@ export default function AgentConfig({ return _agent.code_files ?? []; }, [agent, agent_id, fileMap]); - /* Mutations */ - const update = useUpdateAgentMutation({ - onSuccess: (data) => { - showToast({ - message: `${localize('com_assistants_update_success')} ${ - data.name ?? localize('com_ui_agent') - }`, - }); - }, - onError: (err) => { - const error = err as Error; - showToast({ - message: `${localize('com_agents_update_error')}${ - error.message ? ` ${localize('com_ui_error')}: ${error.message}` : '' - }`, - status: 'error', - }); - }, - }); - - const create = useCreateAgentMutation({ - onSuccess: (data) => { - setCurrentAgentId(data.id); - showToast({ - message: `${localize('com_assistants_create_success')} ${ - data.name ?? localize('com_ui_agent') - }`, - }); - }, - onError: (err) => { - const error = err as Error; - showToast({ - message: `${localize('com_agents_create_error')}${ - error.message ? ` ${localize('com_ui_error')}: ${error.message}` : '' - }`, - status: 'error', - }); - }, - }); - const handleAddActions = useCallback(() => { if (!agent_id) { showToast({ @@ -225,26 +166,14 @@ export default function AgentConfig({ Icon = icons[iconKey]; } - const renderSaveButton = () => { - if (create.isLoading || update.isLoading) { - return