🔍 feat: Mistral OCR API / Upload Files as Text (#6274)

* refactor: move `loadAuthValues` to `~/services/Tools/credentials`

* feat: add createAxiosInstance function to configure axios with proxy support

* WIP: First pass mistral ocr

* refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic

* refactor: improve document formatting in encodeAndFormat function

* refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config)

* fix: update getFiles call to include files with `text` property as well

* refactor: move file handling to `initializeAgentOptions`

* refactor: enhance addImageURLs method to handle OCR text and improve message formatting

* refactor: update message formatting to handle OCR text in various content types

* refactor: remove unused resendFiles property from compactAgentsSchema

* fix: add error handling for Mistral OCR document upload and logging

* refactor: integrate OCR capability into file upload options and configuration

* refactor: skip processing for text source files in delete request, as they are directly tied to database

* feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling

* fix: source icon styling

* wip: first pass, frontend file context agent resources

* refactor: add hover card with contextual information for File Context (OCR) in FileContext component

* feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization

* feat: implement OCR config; fix: agent resource deletion for ocr files

* feat: enhance agent initialization by adding OCR capability check in resource priming

* ci: fix `~/config` module mock

* ci: add OCR property expectation in AppService tests

* refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed

* ci: add unit test to ensure environment variable references are not parsed in OCR config

* refactor: disable base64 image inclusion in OCR request

* refactor: enhance OCR configuration handling by validating environment variables and providing defaults

* refactor: use file stream from disk for mistral ocr api
This commit is contained in:
Danny Avila 2025-03-10 17:23:46 -04:00 committed by GitHub
parent 9db00edfc4
commit ded3cd8876
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 1621 additions and 131 deletions

View file

@ -1121,9 +1121,13 @@ class BaseClient {
return message; return message;
} }
const files = await getFiles({ const files = await getFiles(
file_id: { $in: fileIds }, {
}); file_id: { $in: fileIds },
},
{},
{},
);
await this.addImageURLs(message, files, this.visionMode); await this.addImageURLs(message, files, this.visionMode);

View file

@ -21,6 +21,7 @@ const {
} = require('../'); } = require('../');
const { primeFiles: primeCodeFiles } = require('~/server/services/Files/Code/process'); const { primeFiles: primeCodeFiles } = require('~/server/services/Files/Code/process');
const { createFileSearchTool, primeFiles: primeSearchFiles } = require('./fileSearch'); const { createFileSearchTool, primeFiles: primeSearchFiles } = require('./fileSearch');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { createMCPTool } = require('~/server/services/MCP'); const { createMCPTool } = require('~/server/services/MCP');
const { loadSpecs } = require('./loadSpecs'); const { loadSpecs } = require('./loadSpecs');
const { logger } = require('~/config'); const { logger } = require('~/config');
@ -90,45 +91,6 @@ const validateTools = async (user, tools = []) => {
} }
}; };
const loadAuthValues = async ({ userId, authFields, throwError = true }) => {
let authValues = {};
/**
* Finds the first non-empty value for the given authentication field, supporting alternate fields.
* @param {string[]} fields Array of strings representing the authentication fields. Supports alternate fields delimited by "||".
* @returns {Promise<{ authField: string, authValue: string} | null>} An object containing the authentication field and value, or null if not found.
*/
const findAuthValue = async (fields) => {
for (const field of fields) {
let value = process.env[field];
if (value) {
return { authField: field, authValue: value };
}
try {
value = await getUserPluginAuthValue(userId, field, throwError);
} catch (err) {
if (field === fields[fields.length - 1] && !value) {
throw err;
}
}
if (value) {
return { authField: field, authValue: value };
}
}
return null;
};
for (let authField of authFields) {
const fields = authField.split('||');
const result = await findAuthValue(fields);
if (result) {
authValues[result.authField] = result.authValue;
}
}
return authValues;
};
/** @typedef {typeof import('@langchain/core/tools').Tool} ToolConstructor */ /** @typedef {typeof import('@langchain/core/tools').Tool} ToolConstructor */
/** @typedef {import('@langchain/core/tools').Tool} Tool */ /** @typedef {import('@langchain/core/tools').Tool} Tool */
@ -348,7 +310,6 @@ const loadTools = async ({
module.exports = { module.exports = {
loadToolWithAuth, loadToolWithAuth,
loadAuthValues,
validateTools, validateTools,
loadTools, loadTools,
}; };

View file

@ -1,9 +1,8 @@
const { validateTools, loadTools, loadAuthValues } = require('./handleTools'); const { validateTools, loadTools } = require('./handleTools');
const handleOpenAIErrors = require('./handleOpenAIErrors'); const handleOpenAIErrors = require('./handleOpenAIErrors');
module.exports = { module.exports = {
handleOpenAIErrors, handleOpenAIErrors,
loadAuthValues,
validateTools, validateTools,
loadTools, loadTools,
}; };

View file

@ -1,3 +1,4 @@
const axios = require('axios');
const { EventSource } = require('eventsource'); const { EventSource } = require('eventsource');
const { Time, CacheKeys } = require('librechat-data-provider'); const { Time, CacheKeys } = require('librechat-data-provider');
const logger = require('./winston'); const logger = require('./winston');
@ -47,9 +48,24 @@ const sendEvent = (res, event) => {
res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`); res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`);
}; };
function createAxiosInstance() {
const instance = axios.create();
if (process.env.proxy) {
const url = new URL(process.env.proxy);
instance.defaults.proxy = {
host: url.hostname,
protocol: url.protocol.replace(':', ''),
};
}
return instance;
}
module.exports = { module.exports = {
logger, logger,
sendEvent, sendEvent,
getMCPManager, getMCPManager,
createAxiosInstance,
getFlowStateManager, getFlowStateManager,
}; };

View file

@ -15,19 +15,6 @@ const searchConversation = async (conversationId) => {
throw new Error('Error searching conversation'); throw new Error('Error searching conversation');
} }
}; };
/**
* Searches for a conversation by conversationId and returns associated file ids.
* @param {string} conversationId - The conversation's ID.
* @returns {Promise<string[] | null>}
*/
const getConvoFiles = async (conversationId) => {
try {
return (await Conversation.findOne({ conversationId }, 'files').lean())?.files ?? [];
} catch (error) {
logger.error('[getConvoFiles] Error getting conversation files', error);
throw new Error('Error getting conversation files');
}
};
/** /**
* Retrieves a single conversation for a given user and conversation ID. * Retrieves a single conversation for a given user and conversation ID.
@ -73,9 +60,46 @@ const deleteNullOrEmptyConversations = async () => {
} }
}; };
/**
* Retrieves files from a conversation that have either embedded=true
* or a metadata.fileIdentifier. Simplified and efficient query.
*
* @param {string} conversationId - The conversation ID
* @returns {Promise<MongoFile[]>} - Filtered array of matching file objects
*/
const getToolFiles = async (conversationId) => {
try {
const [result] = await Conversation.aggregate([
{ $match: { conversationId } },
{
$project: {
files: {
$filter: {
input: '$files',
as: 'file',
cond: {
$or: [
{ $eq: ['$$file.embedded', true] },
{ $ifNull: ['$$file.metadata.fileIdentifier', false] },
],
},
},
},
_id: 0,
},
},
]).exec();
return result?.files || [];
} catch (error) {
logger.error('[getConvoEmbeddedFiles] Error fetching embedded files:', error);
throw new Error('Error fetching embedded files');
}
};
module.exports = { module.exports = {
Conversation, Conversation,
getConvoFiles, getToolFiles,
searchConversation, searchConversation,
deleteNullOrEmptyConversations, deleteNullOrEmptyConversations,
/** /**

View file

@ -17,11 +17,13 @@ const findFileById = async (file_id, options = {}) => {
* Retrieves files matching a given filter, sorted by the most recently updated. * Retrieves files matching a given filter, sorted by the most recently updated.
* @param {Object} filter - The filter criteria to apply. * @param {Object} filter - The filter criteria to apply.
* @param {Object} [_sortOptions] - Optional sort parameters. * @param {Object} [_sortOptions] - Optional sort parameters.
* @param {Object|String} [selectFields={ text: 0 }] - Fields to include/exclude in the query results.
* Default excludes the 'text' field.
* @returns {Promise<Array<IMongoFile>>} A promise that resolves to an array of file documents. * @returns {Promise<Array<IMongoFile>>} A promise that resolves to an array of file documents.
*/ */
const getFiles = async (filter, _sortOptions) => { const getFiles = async (filter, _sortOptions, selectFields = { text: 0 }) => {
const sortOptions = { updatedAt: -1, ..._sortOptions }; const sortOptions = { updatedAt: -1, ..._sortOptions };
return await File.find(filter).sort(sortOptions).lean(); return await File.find(filter).select(selectFields).sort(sortOptions).lean();
}; };
/** /**

View file

@ -10,8 +10,8 @@ const {
ChatModelStreamHandler, ChatModelStreamHandler,
} = require('@librechat/agents'); } = require('@librechat/agents');
const { processCodeOutput } = require('~/server/services/Files/Code/process'); const { processCodeOutput } = require('~/server/services/Files/Code/process');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { saveBase64Image } = require('~/server/services/Files/process'); const { saveBase64Image } = require('~/server/services/Files/process');
const { loadAuthValues } = require('~/app/clients/tools/util');
const { logger, sendEvent } = require('~/config'); const { logger, sendEvent } = require('~/config');
/** @typedef {import('@librechat/agents').Graph} Graph */ /** @typedef {import('@librechat/agents').Graph} Graph */

View file

@ -223,14 +223,23 @@ class AgentClient extends BaseClient {
}; };
} }
/**
*
* @param {TMessage} message
* @param {Array<MongoFile>} attachments
* @returns {Promise<Array<Partial<MongoFile>>>}
*/
async addImageURLs(message, attachments) { async addImageURLs(message, attachments) {
const { files, image_urls } = await encodeAndFormat( const { files, text, image_urls } = await encodeAndFormat(
this.options.req, this.options.req,
attachments, attachments,
this.options.agent.provider, this.options.agent.provider,
VisionModes.agents, VisionModes.agents,
); );
message.image_urls = image_urls.length ? image_urls : undefined; message.image_urls = image_urls.length ? image_urls : undefined;
if (text && text.length) {
message.ocr = text;
}
return files; return files;
} }
@ -308,7 +317,21 @@ class AgentClient extends BaseClient {
assistantName: this.options?.modelLabel, assistantName: this.options?.modelLabel,
}); });
const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount; if (message.ocr && i !== orderedMessages.length - 1) {
if (typeof formattedMessage.content === 'string') {
formattedMessage.content = message.ocr + '\n' + formattedMessage.content;
} else {
const textPart = formattedMessage.content.find((part) => part.type === 'text');
textPart
? (textPart.text = message.ocr + '\n' + textPart.text)
: formattedMessage.content.unshift({ type: 'text', text: message.ocr });
}
} else if (message.ocr && i === orderedMessages.length - 1) {
systemContent = [systemContent, message.ocr].join('\n');
}
const needsTokenCount =
(this.contextStrategy && !orderedMessages[i].tokenCount) || message.ocr;
/* If tokens were never counted, or, is a Vision request and the message has files, count again */ /* If tokens were never counted, or, is a Vision request and the message has files, count again */
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) { if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {

View file

@ -10,7 +10,8 @@ const {
const { processFileURL, uploadImageBuffer } = require('~/server/services/Files/process'); const { processFileURL, uploadImageBuffer } = require('~/server/services/Files/process');
const { processCodeOutput } = require('~/server/services/Files/Code/process'); const { processCodeOutput } = require('~/server/services/Files/Code/process');
const { createToolCall, getToolCallsByConvo } = require('~/models/ToolCall'); const { createToolCall, getToolCallsByConvo } = require('~/models/ToolCall');
const { loadAuthValues, loadTools } = require('~/app/clients/tools/util'); const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { loadTools } = require('~/app/clients/tools/util');
const { checkAccess } = require('~/server/middleware'); const { checkAccess } = require('~/server/middleware');
const { getMessage } = require('~/models/Message'); const { getMessage } = require('~/models/Message');
const { logger } = require('~/config'); const { logger } = require('~/config');

View file

@ -10,7 +10,6 @@ const openAI = require('~/server/services/Endpoints/openAI');
const agents = require('~/server/services/Endpoints/agents'); const agents = require('~/server/services/Endpoints/agents');
const custom = require('~/server/services/Endpoints/custom'); const custom = require('~/server/services/Endpoints/custom');
const google = require('~/server/services/Endpoints/google'); const google = require('~/server/services/Endpoints/google');
const { getConvoFiles } = require('~/models/Conversation');
const { handleError } = require('~/server/utils'); const { handleError } = require('~/server/utils');
const buildFunction = { const buildFunction = {
@ -87,16 +86,8 @@ async function buildEndpointOption(req, res, next) {
// TODO: use `getModelsConfig` only when necessary // TODO: use `getModelsConfig` only when necessary
const modelsConfig = await getModelsConfig(req); const modelsConfig = await getModelsConfig(req);
const { resendFiles = true } = req.body.endpointOption;
req.body.endpointOption.modelsConfig = modelsConfig; req.body.endpointOption.modelsConfig = modelsConfig;
if (isAgents && resendFiles && req.body.conversationId) { if (req.body.files && !isAgents) {
const fileIds = await getConvoFiles(req.body.conversationId);
const requestFiles = req.body.files ?? [];
if (requestFiles.length || fileIds.length) {
req.body.endpointOption.attachments = processFiles(requestFiles, fileIds);
}
} else if (req.body.files) {
// hold the promise
req.body.endpointOption.attachments = processFiles(req.body.files); req.body.endpointOption.attachments = processFiles(req.body.files);
} }
next(); next();

View file

@ -16,7 +16,7 @@ const {
} = require('~/server/services/Files/process'); } = require('~/server/services/Files/process');
const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers'); const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
const { loadAuthValues } = require('~/app/clients/tools/util'); const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { getAgent } = require('~/models/Agent'); const { getAgent } = require('~/models/Agent');
const { getFiles } = require('~/models/File'); const { getFiles } = require('~/models/File');
const { logger } = require('~/config'); const { logger } = require('~/config');

View file

@ -1,4 +1,9 @@
const { FileSources, EModelEndpoint, getConfigDefaults } = require('librechat-data-provider'); const {
FileSources,
EModelEndpoint,
loadOCRConfig,
getConfigDefaults,
} = require('librechat-data-provider');
const { checkVariables, checkHealth, checkConfig, checkAzureVariables } = require('./start/checks'); const { checkVariables, checkHealth, checkConfig, checkAzureVariables } = require('./start/checks');
const { azureAssistantsDefaults, assistantsConfigSetup } = require('./start/assistants'); const { azureAssistantsDefaults, assistantsConfigSetup } = require('./start/assistants');
const { initializeFirebase } = require('./Files/Firebase/initialize'); const { initializeFirebase } = require('./Files/Firebase/initialize');
@ -25,6 +30,7 @@ const AppService = async (app) => {
const config = (await loadCustomConfig()) ?? {}; const config = (await loadCustomConfig()) ?? {};
const configDefaults = getConfigDefaults(); const configDefaults = getConfigDefaults();
const ocr = loadOCRConfig(config.ocr);
const filteredTools = config.filteredTools; const filteredTools = config.filteredTools;
const includedTools = config.includedTools; const includedTools = config.includedTools;
const fileStrategy = config.fileStrategy ?? configDefaults.fileStrategy; const fileStrategy = config.fileStrategy ?? configDefaults.fileStrategy;
@ -57,6 +63,7 @@ const AppService = async (app) => {
const interfaceConfig = await loadDefaultInterface(config, configDefaults); const interfaceConfig = await loadDefaultInterface(config, configDefaults);
const defaultLocals = { const defaultLocals = {
ocr,
paths, paths,
fileStrategy, fileStrategy,
socialLogins, socialLogins,

View file

@ -120,6 +120,7 @@ describe('AppService', () => {
}, },
}, },
paths: expect.anything(), paths: expect.anything(),
ocr: expect.anything(),
imageOutputType: expect.any(String), imageOutputType: expect.any(String),
fileConfig: undefined, fileConfig: undefined,
secureImageLinks: undefined, secureImageLinks: undefined,
@ -588,4 +589,33 @@ describe('AppService updating app.locals and issuing warnings', () => {
); );
}); });
}); });
it('should not parse environment variable references in OCR config', async () => {
// Mock custom configuration with env variable references in OCR config
const mockConfig = {
ocr: {
apiKey: '${OCR_API_KEY_CUSTOM_VAR_NAME}',
baseURL: '${OCR_BASEURL_CUSTOM_VAR_NAME}',
strategy: 'mistral_ocr',
mistralModel: 'mistral-medium',
},
};
require('./Config/loadCustomConfig').mockImplementationOnce(() => Promise.resolve(mockConfig));
// Set actual environment variables with different values
process.env.OCR_API_KEY_CUSTOM_VAR_NAME = 'actual-api-key';
process.env.OCR_BASEURL_CUSTOM_VAR_NAME = 'https://actual-ocr-url.com';
// Initialize app
const app = { locals: {} };
await AppService(app);
// Verify that the raw string references were preserved and not interpolated
expect(app.locals.ocr).toBeDefined();
expect(app.locals.ocr.apiKey).toEqual('${OCR_API_KEY_CUSTOM_VAR_NAME}');
expect(app.locals.ocr.baseURL).toEqual('${OCR_BASEURL_CUSTOM_VAR_NAME}');
expect(app.locals.ocr.strategy).toEqual('mistral_ocr');
expect(app.locals.ocr.mistralModel).toEqual('mistral-medium');
});
}); });

View file

@ -2,15 +2,8 @@ const { loadAgent } = require('~/models/Agent');
const { logger } = require('~/config'); const { logger } = require('~/config');
const buildOptions = (req, endpoint, parsedBody) => { const buildOptions = (req, endpoint, parsedBody) => {
const { const { spec, iconURL, agent_id, instructions, maxContextTokens, ...model_parameters } =
spec, parsedBody;
iconURL,
agent_id,
instructions,
maxContextTokens,
resendFiles = true,
...model_parameters
} = parsedBody;
const agentPromise = loadAgent({ const agentPromise = loadAgent({
req, req,
agent_id, agent_id,
@ -24,7 +17,6 @@ const buildOptions = (req, endpoint, parsedBody) => {
iconURL, iconURL,
endpoint, endpoint,
agent_id, agent_id,
resendFiles,
instructions, instructions,
maxContextTokens, maxContextTokens,
model_parameters, model_parameters,

View file

@ -2,6 +2,7 @@ const { createContentAggregator, Providers } = require('@librechat/agents');
const { const {
EModelEndpoint, EModelEndpoint,
getResponseSender, getResponseSender,
AgentCapabilities,
providerEndpointMap, providerEndpointMap,
} = require('librechat-data-provider'); } = require('librechat-data-provider');
const { const {
@ -15,10 +16,13 @@ const initCustom = require('~/server/services/Endpoints/custom/initialize');
const initGoogle = require('~/server/services/Endpoints/google/initialize'); const initGoogle = require('~/server/services/Endpoints/google/initialize');
const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts'); const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
const { getCustomEndpointConfig } = require('~/server/services/Config'); const { getCustomEndpointConfig } = require('~/server/services/Config');
const { processFiles } = require('~/server/services/Files/process');
const { loadAgentTools } = require('~/server/services/ToolService'); const { loadAgentTools } = require('~/server/services/ToolService');
const AgentClient = require('~/server/controllers/agents/client'); const AgentClient = require('~/server/controllers/agents/client');
const { getToolFiles } = require('~/models/Conversation');
const { getModelMaxTokens } = require('~/utils'); const { getModelMaxTokens } = require('~/utils');
const { getAgent } = require('~/models/Agent'); const { getAgent } = require('~/models/Agent');
const { getFiles } = require('~/models/File');
const { logger } = require('~/config'); const { logger } = require('~/config');
const providerConfigMap = { const providerConfigMap = {
@ -34,20 +38,38 @@ const providerConfigMap = {
}; };
/** /**
* * @param {ServerRequest} req
* @param {Promise<Array<MongoFile | null>> | undefined} _attachments * @param {Promise<Array<MongoFile | null>> | undefined} _attachments
* @param {AgentToolResources | undefined} _tool_resources * @param {AgentToolResources | undefined} _tool_resources
* @returns {Promise<{ attachments: Array<MongoFile | undefined> | undefined, tool_resources: AgentToolResources | undefined }>} * @returns {Promise<{ attachments: Array<MongoFile | undefined> | undefined, tool_resources: AgentToolResources | undefined }>}
*/ */
const primeResources = async (_attachments, _tool_resources) => { const primeResources = async (req, _attachments, _tool_resources) => {
try { try {
/** @type {Array<MongoFile | undefined> | undefined} */
let attachments;
const tool_resources = _tool_resources ?? {};
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
AgentCapabilities.ocr,
);
if (tool_resources.ocr?.file_ids && isOCREnabled) {
const context = await getFiles(
{
file_id: { $in: tool_resources.ocr.file_ids },
},
{},
{},
);
attachments = (attachments ?? []).concat(context);
}
if (!_attachments) { if (!_attachments) {
return { attachments: undefined, tool_resources: _tool_resources }; return { attachments, tool_resources };
} }
/** @type {Array<MongoFile | undefined> | undefined} */ /** @type {Array<MongoFile | undefined> | undefined} */
const files = await _attachments; const files = await _attachments;
const attachments = []; if (!attachments) {
const tool_resources = _tool_resources ?? {}; /** @type {Array<MongoFile | undefined>} */
attachments = [];
}
for (const file of files) { for (const file of files) {
if (!file) { if (!file) {
@ -82,7 +104,6 @@ const primeResources = async (_attachments, _tool_resources) => {
* @param {ServerResponse} params.res * @param {ServerResponse} params.res
* @param {Agent} params.agent * @param {Agent} params.agent
* @param {object} [params.endpointOption] * @param {object} [params.endpointOption]
* @param {AgentToolResources} [params.tool_resources]
* @param {boolean} [params.isInitialAgent] * @param {boolean} [params.isInitialAgent]
* @returns {Promise<Agent>} * @returns {Promise<Agent>}
*/ */
@ -91,9 +112,28 @@ const initializeAgentOptions = async ({
res, res,
agent, agent,
endpointOption, endpointOption,
tool_resources,
isInitialAgent = false, isInitialAgent = false,
}) => { }) => {
let currentFiles;
const requestFiles = req.body.files ?? [];
if (
isInitialAgent &&
req.body.conversationId != null &&
agent.model_parameters?.resendFiles === true
) {
const fileIds = (await getToolFiles(req.body.conversationId)).map((f) => f.file_id);
if (requestFiles.length || fileIds.length) {
currentFiles = await processFiles(requestFiles, fileIds);
}
} else if (isInitialAgent && requestFiles.length) {
currentFiles = await processFiles(requestFiles);
}
const { attachments, tool_resources } = await primeResources(
req,
currentFiles,
agent.tool_resources,
);
const { tools, toolContextMap } = await loadAgentTools({ const { tools, toolContextMap } = await loadAgentTools({
req, req,
res, res,
@ -160,6 +200,7 @@ const initializeAgentOptions = async ({
return { return {
...agent, ...agent,
tools, tools,
attachments,
toolContextMap, toolContextMap,
maxContextTokens: maxContextTokens:
agent.max_context_tokens ?? agent.max_context_tokens ??
@ -197,11 +238,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
throw new Error('Agent not found'); throw new Error('Agent not found');
} }
const { attachments, tool_resources } = await primeResources(
endpointOption.attachments,
primaryAgent.tool_resources,
);
const agentConfigs = new Map(); const agentConfigs = new Map();
// Handle primary agent // Handle primary agent
@ -210,7 +246,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
res, res,
agent: primaryAgent, agent: primaryAgent,
endpointOption, endpointOption,
tool_resources,
isInitialAgent: true, isInitialAgent: true,
}); });
@ -240,18 +275,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
const client = new AgentClient({ const client = new AgentClient({
req, req,
agent: primaryConfig,
sender, sender,
attachments,
contentParts, contentParts,
agentConfigs,
eventHandlers, eventHandlers,
collectedUsage, collectedUsage,
artifactPromises, artifactPromises,
agent: primaryConfig,
spec: endpointOption.spec, spec: endpointOption.spec,
iconURL: endpointOption.iconURL, iconURL: endpointOption.iconURL,
agentConfigs,
endpoint: EModelEndpoint.agents, endpoint: EModelEndpoint.agents,
attachments: primaryConfig.attachments,
maxContextTokens: primaryConfig.maxContextTokens, maxContextTokens: primaryConfig.maxContextTokens,
resendFiles: primaryConfig.model_parameters?.resendFiles ?? true,
}); });
return { client }; return { client };

View file

@ -0,0 +1,207 @@
// ~/server/services/Files/MistralOCR/crud.js
const fs = require('fs');
const path = require('path');
const FormData = require('form-data');
const { FileSources, envVarRegex, extractEnvVariable } = require('librechat-data-provider');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { logger, createAxiosInstance } = require('~/config');
const { logAxiosError } = require('~/utils');
const axios = createAxiosInstance();
/**
* Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory
*
* @param {Object} params Upload parameters
* @param {string} params.filePath The path to the file on disk
* @param {string} [params.fileName] Optional filename to use (defaults to the name from filePath)
* @param {string} params.apiKey Mistral API key
* @param {string} [params.baseURL=https://api.mistral.ai/v1] Mistral API base URL
* @returns {Promise<Object>} The response from Mistral API
*/
async function uploadDocumentToMistral({
filePath,
fileName = '',
apiKey,
baseURL = 'https://api.mistral.ai/v1',
}) {
const form = new FormData();
form.append('purpose', 'ocr');
const actualFileName = fileName || path.basename(filePath);
const fileStream = fs.createReadStream(filePath);
form.append('file', fileStream, { filename: actualFileName });
return axios
.post(`${baseURL}/files`, form, {
headers: {
Authorization: `Bearer ${apiKey}`,
...form.getHeaders(),
},
maxBodyLength: Infinity,
maxContentLength: Infinity,
})
.then((res) => res.data)
.catch((error) => {
logger.error('Error uploading document to Mistral:', error.message);
throw error;
});
}
async function getSignedUrl({
apiKey,
fileId,
expiry = 24,
baseURL = 'https://api.mistral.ai/v1',
}) {
return axios
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
headers: {
Authorization: `Bearer ${apiKey}`,
},
})
.then((res) => res.data)
.catch((error) => {
logger.error('Error fetching signed URL:', error.message);
throw error;
});
}
/**
* @param {Object} params
* @param {string} params.apiKey
* @param {string} params.documentUrl
* @param {string} [params.baseURL]
* @returns {Promise<OCRResult>}
*/
async function performOCR({
apiKey,
documentUrl,
model = 'mistral-ocr-latest',
baseURL = 'https://api.mistral.ai/v1',
}) {
return axios
.post(
`${baseURL}/ocr`,
{
model,
include_image_base64: false,
document: {
type: 'document_url',
document_url: documentUrl,
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
},
)
.then((res) => res.data)
.catch((error) => {
logger.error('Error performing OCR:', error.message);
throw error;
});
}
function extractVariableName(str) {
const match = str.match(envVarRegex);
return match ? match[1] : null;
}
const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
try {
/** @type {TCustomConfig['ocr']} */
const ocrConfig = req.app.locals?.ocr;
const apiKeyConfig = ocrConfig.apiKey || '';
const baseURLConfig = ocrConfig.baseURL || '';
const isApiKeyEnvVar = envVarRegex.test(apiKeyConfig);
const isBaseURLEnvVar = envVarRegex.test(baseURLConfig);
const isApiKeyEmpty = !apiKeyConfig.trim();
const isBaseURLEmpty = !baseURLConfig.trim();
let apiKey, baseURL;
if (isApiKeyEnvVar || isBaseURLEnvVar || isApiKeyEmpty || isBaseURLEmpty) {
const apiKeyVarName = isApiKeyEnvVar ? extractVariableName(apiKeyConfig) : 'OCR_API_KEY';
const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL';
const authValues = await loadAuthValues({
userId: req.user.id,
authFields: [baseURLVarName, apiKeyVarName],
optional: new Set([baseURLVarName]),
});
apiKey = authValues[apiKeyVarName];
baseURL = authValues[baseURLVarName];
} else {
apiKey = apiKeyConfig;
baseURL = baseURLConfig;
}
const mistralFile = await uploadDocumentToMistral({
filePath: file.path,
fileName: file.originalname,
apiKey,
baseURL,
});
const modelConfig = ocrConfig.mistralModel || '';
const model = envVarRegex.test(modelConfig)
? extractEnvVariable(modelConfig)
: modelConfig.trim() || 'mistral-ocr-latest';
const signedUrlResponse = await getSignedUrl({
apiKey,
baseURL,
fileId: mistralFile.id,
});
const ocrResult = await performOCR({
apiKey,
baseURL,
model,
documentUrl: signedUrlResponse.url,
});
let aggregatedText = '';
const images = [];
ocrResult.pages.forEach((page, index) => {
if (ocrResult.pages.length > 1) {
aggregatedText += `# PAGE ${index + 1}\n`;
}
aggregatedText += page.markdown + '\n\n';
if (page.images && page.images.length > 0) {
page.images.forEach((image) => {
if (image.image_base64) {
images.push(image.image_base64);
}
});
}
});
return {
filename: file.originalname,
bytes: aggregatedText.length * 4,
filepath: FileSources.mistral_ocr,
text: aggregatedText,
images,
};
} catch (error) {
const message = 'Error uploading document to Mistral OCR API';
logAxiosError({ error, message });
throw new Error(message);
}
};
module.exports = {
uploadDocumentToMistral,
uploadMistralOCR,
getSignedUrl,
performOCR,
};

View file

@ -0,0 +1,737 @@
const fs = require('fs');
const mockAxios = {
interceptors: {
request: { use: jest.fn(), eject: jest.fn() },
response: { use: jest.fn(), eject: jest.fn() },
},
create: jest.fn().mockReturnValue({
defaults: {
proxy: null,
},
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
}),
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
reset: jest.fn().mockImplementation(function () {
this.get.mockClear();
this.post.mockClear();
this.put.mockClear();
this.delete.mockClear();
this.create.mockClear();
}),
};
jest.mock('axios', () => mockAxios);
jest.mock('fs');
jest.mock('~/utils', () => ({
logAxiosError: jest.fn(),
}));
jest.mock('~/config', () => ({
logger: {
error: jest.fn(),
},
createAxiosInstance: () => mockAxios,
}));
jest.mock('~/server/services/Tools/credentials', () => ({
loadAuthValues: jest.fn(),
}));
const { uploadDocumentToMistral, uploadMistralOCR, getSignedUrl, performOCR } = require('./crud');
describe('MistralOCR Service', () => {
afterEach(() => {
mockAxios.reset();
jest.clearAllMocks();
});
describe('uploadDocumentToMistral', () => {
beforeEach(() => {
// Create a more complete mock for file streams that FormData can work with
const mockReadStream = {
on: jest.fn().mockImplementation(function (event, handler) {
// Simulate immediate 'end' event to make FormData complete processing
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function () {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
};
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
// Mock FormData's append to avoid actual stream processing
jest.mock('form-data', () => {
const mockFormData = function () {
return {
append: jest.fn(),
getHeaders: jest
.fn()
.mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }),
getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')),
getLength: jest.fn().mockReturnValue(100),
};
};
return mockFormData;
});
});
it('should upload a document to Mistral API using file streaming', async () => {
const mockResponse = { data: { id: 'file-123', purpose: 'ocr' } };
mockAxios.post.mockResolvedValueOnce(mockResponse);
const result = await uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
});
// Check that createReadStream was called with the correct file path
expect(fs.createReadStream).toHaveBeenCalledWith('/path/to/test.pdf');
// Since we're mocking FormData, we'll just check that axios was called correctly
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files',
expect.anything(),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer test-api-key',
}),
maxBodyLength: Infinity,
maxContentLength: Infinity,
}),
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors during document upload', async () => {
const errorMessage = 'API error';
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
await expect(
uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith(
expect.stringContaining('Error uploading document to Mistral:'),
expect.any(String),
);
});
});
describe('getSignedUrl', () => {
it('should fetch signed URL from Mistral API', async () => {
const mockResponse = { data: { url: 'https://document-url.com' } };
mockAxios.get.mockResolvedValueOnce(mockResponse);
const result = await getSignedUrl({
fileId: 'file-123',
apiKey: 'test-api-key',
});
expect(mockAxios.get).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-123/url?expiry=24',
{
headers: {
Authorization: 'Bearer test-api-key',
},
},
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors when fetching signed URL', async () => {
const errorMessage = 'API error';
mockAxios.get.mockRejectedValueOnce(new Error(errorMessage));
await expect(
getSignedUrl({
fileId: 'file-123',
apiKey: 'test-api-key',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage);
});
});
describe('performOCR', () => {
it('should perform OCR using Mistral API', async () => {
const mockResponse = {
data: {
pages: [{ markdown: 'Page 1 content' }, { markdown: 'Page 2 content' }],
},
};
mockAxios.post.mockResolvedValueOnce(mockResponse);
const result = await performOCR({
apiKey: 'test-api-key',
documentUrl: 'https://document-url.com',
model: 'mistral-ocr-latest',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/ocr',
{
model: 'mistral-ocr-latest',
include_image_base64: false,
document: {
type: 'document_url',
document_url: 'https://document-url.com',
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: 'Bearer test-api-key',
},
},
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors during OCR processing', async () => {
const errorMessage = 'OCR processing error';
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
await expect(
performOCR({
apiKey: 'test-api-key',
documentUrl: 'https://document-url.com',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage);
});
});
describe('uploadMistralOCR', () => {
beforeEach(() => {
const mockReadStream = {
on: jest.fn().mockImplementation(function (event, handler) {
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function () {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
};
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
});
it('should process OCR for a file with standard configuration', async () => {
// Setup mocks
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload response
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
// Mock signed URL response
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
// Mock OCR response with text and images
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [
{
markdown: 'Page 1 content',
images: [{ image_base64: 'base64image1' }],
},
{
markdown: 'Page 2 content',
images: [{ image_base64: 'base64image2' }],
},
],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Use environment variable syntax to ensure loadAuthValues is called
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-medium',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
optional: expect.any(Set),
});
// Verify OCR result
expect(result).toEqual({
filename: 'document.pdf',
bytes: expect.any(Number),
filepath: 'mistral_ocr',
text: expect.stringContaining('# PAGE 1'),
images: ['base64image1', 'base64image2'],
});
});
it('should process variable references in configuration', async () => {
// Setup mocks with environment variables
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
CUSTOM_API_KEY: 'custom-api-key',
CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1',
});
// Mock API responses
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [{ markdown: 'Content from custom API' }],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: '${CUSTOM_API_KEY}',
baseURL: '${CUSTOM_BASEURL}',
mistralModel: '${CUSTOM_MODEL}',
},
},
},
};
// Set environment variable for model
process.env.CUSTOM_MODEL = 'mistral-large';
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify that custom environment variables were extracted and used
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'],
optional: expect.any(Set),
});
// Check that mistral-large was used in the OCR API call
expect(mockAxios.post).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
model: 'mistral-large',
}),
expect.anything(),
);
expect(result.text).toEqual('Content from custom API\n\n');
});
it('should fall back to default values when variables are not properly formatted', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'default-api-key',
OCR_BASEURL: undefined, // Testing optional parameter
});
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [{ markdown: 'Default API result' }],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Use environment variable syntax to ensure loadAuthValues is called
apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name
baseURL: '${OCR_BASEURL}', // Using valid env var format
mistralModel: 'mistral-ocr-latest', // Plain string value
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Should use the default values
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'INVALID_FORMAT'],
optional: expect.any(Set),
});
// Should use the default model when not using environment variable format
expect(mockAxios.post).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
model: 'mistral-ocr-latest',
}),
expect.anything(),
);
});
it('should handle API errors during OCR process', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
});
// Mock file upload to fail
mockAxios.post.mockRejectedValueOnce(new Error('Upload failed'));
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: 'OCR_API_KEY',
baseURL: 'OCR_BASEURL',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
await expect(
uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
}),
).rejects.toThrow('Error uploading document to Mistral OCR API');
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
const { logAxiosError } = require('~/utils');
expect(logAxiosError).toHaveBeenCalled();
});
it('should handle single page documents without page numbering', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included
});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Single page content' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: 'OCR_API_KEY',
baseURL: 'OCR_BASEURL',
mistralModel: 'mistral-ocr-latest',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'single-page.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify that single page documents don't include page numbering
expect(result.text).not.toContain('# PAGE');
expect(result.text).toEqual('Single page content\n\n');
});
it('should use literal values in configuration when provided directly', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
// We'll still mock this but it should not be used for literal values
loadAuthValues.mockResolvedValue({});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Processed with literal config values' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Direct values that should be used as-is, without variable substitution
apiKey: 'actual-api-key-value',
baseURL: 'https://direct-api-url.mistral.ai/v1',
mistralModel: 'mistral-direct-model',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'direct-values.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify the correct URL was used with the direct baseURL value
expect(mockAxios.post).toHaveBeenCalledWith(
'https://direct-api-url.mistral.ai/v1/files',
expect.any(Object),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer actual-api-key-value',
}),
}),
);
// Check the OCR call was made with the direct model value
expect(mockAxios.post).toHaveBeenCalledWith(
'https://direct-api-url.mistral.ai/v1/ocr',
expect.objectContaining({
model: 'mistral-direct-model',
}),
expect.any(Object),
);
// Verify the result
expect(result.text).toEqual('Processed with literal config values\n\n');
// Verify loadAuthValues was never called since we used direct values
expect(loadAuthValues).not.toHaveBeenCalled();
});
it('should handle empty configuration values and use defaults', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
// Set up the mock values to be returned by loadAuthValues
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'default-from-env-key',
OCR_BASEURL: 'https://default-from-env.mistral.ai/v1',
});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Content from default configuration' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Empty string values - should fall back to defaults
apiKey: '',
baseURL: '',
mistralModel: '',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'empty-config.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify loadAuthValues was called with the default variable names
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
optional: expect.any(Set),
});
// Verify the API calls used the default values from loadAuthValues
expect(mockAxios.post).toHaveBeenCalledWith(
'https://default-from-env.mistral.ai/v1/files',
expect.any(Object),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer default-from-env-key',
}),
}),
);
// Verify the OCR model defaulted to mistral-ocr-latest
expect(mockAxios.post).toHaveBeenCalledWith(
'https://default-from-env.mistral.ai/v1/ocr',
expect.objectContaining({
model: 'mistral-ocr-latest',
}),
expect.any(Object),
);
// Check result
expect(result.text).toEqual('Content from default configuration\n\n');
});
});
});

View file

@ -0,0 +1,5 @@
const crud = require('./crud');
module.exports = {
...crud,
};

View file

@ -49,6 +49,7 @@ async function encodeAndFormat(req, files, endpoint, mode) {
const promises = []; const promises = [];
const encodingMethods = {}; const encodingMethods = {};
const result = { const result = {
text: '',
files: [], files: [],
image_urls: [], image_urls: [],
}; };
@ -59,6 +60,9 @@ async function encodeAndFormat(req, files, endpoint, mode) {
for (let file of files) { for (let file of files) {
const source = file.source ?? FileSources.local; const source = file.source ?? FileSources.local;
if (source === FileSources.text && file.text) {
result.text += `${!result.text ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${file.text}\n`;
}
if (!file.height) { if (!file.height) {
promises.push([file, null]); promises.push([file, null]);
@ -85,6 +89,10 @@ async function encodeAndFormat(req, files, endpoint, mode) {
promises.push(preparePayload(req, file)); promises.push(preparePayload(req, file));
} }
if (result.text) {
result.text += '\n```';
}
const detail = req.body.imageDetail ?? ImageDetail.auto; const detail = req.body.imageDetail ?? ImageDetail.auto;
/** @type {Array<[MongoFile, string]>} */ /** @type {Array<[MongoFile, string]>} */

View file

@ -28,8 +28,8 @@ const { addResourceFileId, deleteResourceFileId } = require('~/server/controller
const { addAgentResourceFile, removeAgentResourceFiles } = require('~/models/Agent'); const { addAgentResourceFile, removeAgentResourceFiles } = require('~/models/Agent');
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers'); const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
const { createFile, updateFileUsage, deleteFiles } = require('~/models/File'); const { createFile, updateFileUsage, deleteFiles } = require('~/models/File');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { getEndpointsConfig } = require('~/server/services/Config'); const { getEndpointsConfig } = require('~/server/services/Config');
const { loadAuthValues } = require('~/app/clients/tools/util');
const { LB_QueueAsyncCall } = require('~/server/utils/queue'); const { LB_QueueAsyncCall } = require('~/server/utils/queue');
const { getStrategyFunctions } = require('./strategies'); const { getStrategyFunctions } = require('./strategies');
const { determineFileType } = require('~/server/utils'); const { determineFileType } = require('~/server/utils');
@ -162,7 +162,6 @@ const processDeleteRequest = async ({ req, files }) => {
for (const file of files) { for (const file of files) {
const source = file.source ?? FileSources.local; const source = file.source ?? FileSources.local;
if (req.body.agent_id && req.body.tool_resource) { if (req.body.agent_id && req.body.tool_resource) {
agentFiles.push({ agentFiles.push({
tool_resource: req.body.tool_resource, tool_resource: req.body.tool_resource,
@ -170,6 +169,11 @@ const processDeleteRequest = async ({ req, files }) => {
}); });
} }
if (source === FileSources.text) {
resolvedFileIds.push(file.file_id);
continue;
}
if (checkOpenAIStorage(source) && !client[source]) { if (checkOpenAIStorage(source) && !client[source]) {
await initializeClients(); await initializeClients();
} }
@ -521,6 +525,52 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
if (!isFileSearchEnabled) { if (!isFileSearchEnabled) {
throw new Error('File search is not enabled for Agents'); throw new Error('File search is not enabled for Agents');
} }
} else if (tool_resource === EToolResources.ocr) {
const isOCREnabled = await checkCapability(req, AgentCapabilities.ocr);
if (!isOCREnabled) {
throw new Error('OCR capability is not enabled for Agents');
}
const { handleFileUpload } = getStrategyFunctions(
req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr,
);
const { file_id, temp_file_id } = metadata;
const {
text,
bytes,
// TODO: OCR images support?
images,
filename,
filepath: ocrFileURL,
} = await handleFileUpload({ req, file, file_id, entity_id: agent_id });
const fileInfo = removeNullishValues({
text,
bytes,
file_id,
temp_file_id,
user: req.user.id,
type: file.mimetype,
filepath: ocrFileURL,
source: FileSources.text,
filename: filename ?? file.originalname,
model: messageAttachment ? undefined : req.body.model,
context: messageAttachment ? FileContext.message_attachment : FileContext.agents,
});
if (!messageAttachment && tool_resource) {
await addAgentResourceFile({
req,
file_id,
agent_id,
tool_resource,
});
}
const result = await createFile(fileInfo, true);
return res
.status(200)
.json({ message: 'Agent file uploaded and processed successfully', ...result });
} }
const source = const source =

View file

@ -24,6 +24,7 @@ const {
const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./OpenAI'); const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./OpenAI');
const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code'); const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code');
const { uploadVectors, deleteVectors } = require('./VectorDB'); const { uploadVectors, deleteVectors } = require('./VectorDB');
const { uploadMistralOCR } = require('./MistralOCR');
/** /**
* Firebase Storage Strategy Functions * Firebase Storage Strategy Functions
@ -127,6 +128,26 @@ const codeOutputStrategy = () => ({
getDownloadStream: getCodeOutputDownloadStream, getDownloadStream: getCodeOutputDownloadStream,
}); });
const mistralOCRStrategy = () => ({
/** @type {typeof saveFileFromURL | null} */
saveURL: null,
/** @type {typeof getLocalFileURL | null} */
getFileURL: null,
/** @type {typeof saveLocalBuffer | null} */
saveBuffer: null,
/** @type {typeof processLocalAvatar | null} */
processAvatar: null,
/** @type {typeof uploadLocalImage | null} */
handleImageUpload: null,
/** @type {typeof prepareImagesLocal | null} */
prepareImagePayload: null,
/** @type {typeof deleteLocalFile | null} */
deleteFile: null,
/** @type {typeof getLocalFileStream | null} */
getDownloadStream: null,
handleFileUpload: uploadMistralOCR,
});
// Strategy Selector // Strategy Selector
const getStrategyFunctions = (fileSource) => { const getStrategyFunctions = (fileSource) => {
if (fileSource === FileSources.firebase) { if (fileSource === FileSources.firebase) {
@ -141,6 +162,8 @@ const getStrategyFunctions = (fileSource) => {
return vectorStrategy(); return vectorStrategy();
} else if (fileSource === FileSources.execute_code) { } else if (fileSource === FileSources.execute_code) {
return codeOutputStrategy(); return codeOutputStrategy();
} else if (fileSource === FileSources.mistral_ocr) {
return mistralOCRStrategy();
} else { } else {
throw new Error('Invalid file source'); throw new Error('Invalid file source');
} }

View file

@ -0,0 +1,56 @@
const { getUserPluginAuthValue } = require('~/server/services/PluginService');
/**
*
* @param {Object} params
* @param {string} params.userId
* @param {string[]} params.authFields
* @param {Set<string>} [params.optional]
* @param {boolean} [params.throwError]
* @returns
*/
const loadAuthValues = async ({ userId, authFields, optional, throwError = true }) => {
let authValues = {};
/**
* Finds the first non-empty value for the given authentication field, supporting alternate fields.
* @param {string[]} fields Array of strings representing the authentication fields. Supports alternate fields delimited by "||".
* @returns {Promise<{ authField: string, authValue: string} | null>} An object containing the authentication field and value, or null if not found.
*/
const findAuthValue = async (fields) => {
for (const field of fields) {
let value = process.env[field];
if (value) {
return { authField: field, authValue: value };
}
try {
value = await getUserPluginAuthValue(userId, field, throwError);
} catch (err) {
if (optional && optional.has(field)) {
return { authField: field, authValue: undefined };
}
if (field === fields[fields.length - 1] && !value) {
throw err;
}
}
if (value) {
return { authField: field, authValue: value };
}
}
return null;
};
for (let authField of authFields) {
const fields = authField.split('||');
const result = await findAuthValue(fields);
if (result) {
authValues[result.authField] = result.authValue;
}
}
return authValues;
};
module.exports = {
loadAuthValues,
};

View file

@ -203,6 +203,7 @@ function generateConfig(key, baseURL, endpoint) {
AgentCapabilities.artifacts, AgentCapabilities.artifacts,
AgentCapabilities.actions, AgentCapabilities.actions,
AgentCapabilities.tools, AgentCapabilities.tools,
AgentCapabilities.ocr,
]; ];
} }

View file

@ -39,7 +39,10 @@ jest.mock('winston-daily-rotate-file', () => {
}); });
jest.mock('~/config', () => { jest.mock('~/config', () => {
const actualModule = jest.requireActual('~/config');
return { return {
sendEvent: actualModule.sendEvent,
createAxiosInstance: actualModule.createAxiosInstance,
logger: { logger: {
info: jest.fn(), info: jest.fn(),
warn: jest.fn(), warn: jest.fn(),

View file

@ -1787,3 +1787,51 @@
* @typedef {Promise<{ message: TMessage, conversation: TConversation }> | undefined} ClientDatabaseSavePromise * @typedef {Promise<{ message: TMessage, conversation: TConversation }> | undefined} ClientDatabaseSavePromise
* @memberof typedefs * @memberof typedefs
*/ */
/**
* @exports OCRImage
* @typedef {Object} OCRImage
* @property {string} id - The identifier of the image.
* @property {number} top_left_x - X-coordinate of the top left corner of the image.
* @property {number} top_left_y - Y-coordinate of the top left corner of the image.
* @property {number} bottom_right_x - X-coordinate of the bottom right corner of the image.
* @property {number} bottom_right_y - Y-coordinate of the bottom right corner of the image.
* @property {string} image_base64 - Base64-encoded image data.
* @memberof typedefs
*/
/**
* @exports PageDimensions
* @typedef {Object} PageDimensions
* @property {number} dpi - The dots per inch resolution of the page.
* @property {number} height - The height of the page in pixels.
* @property {number} width - The width of the page in pixels.
* @memberof typedefs
*/
/**
* @exports OCRPage
* @typedef {Object} OCRPage
* @property {number} index - The index of the page in the document.
* @property {string} markdown - The extracted text content of the page in markdown format.
* @property {OCRImage[]} images - Array of images found on the page.
* @property {PageDimensions} dimensions - The dimensions of the page.
* @memberof typedefs
*/
/**
* @exports OCRUsageInfo
* @typedef {Object} OCRUsageInfo
* @property {number} pages_processed - Number of pages processed in the document.
* @property {number} doc_size_bytes - Size of the document in bytes.
* @memberof typedefs
*/
/**
* @exports OCRResult
* @typedef {Object} OCRResult
* @property {OCRPage[]} pages - Array of pages extracted from the document.
* @property {string} model - The model used for OCR processing.
* @property {OCRUsageInfo} usage_info - Usage information for the OCR operation.
* @memberof typedefs
*/

View file

@ -5,6 +5,7 @@ import type { OptionWithIcon, ExtendedFile } from './types';
export type TAgentOption = OptionWithIcon & export type TAgentOption = OptionWithIcon &
Agent & { Agent & {
knowledge_files?: Array<[string, ExtendedFile]>; knowledge_files?: Array<[string, ExtendedFile]>;
context_files?: Array<[string, ExtendedFile]>;
code_files?: Array<[string, ExtendedFile]>; code_files?: Array<[string, ExtendedFile]>;
}; };

View file

@ -483,6 +483,7 @@ export interface ExtendedFile {
attached?: boolean; attached?: boolean;
embedded?: boolean; embedded?: boolean;
tool_resource?: string; tool_resource?: string;
metadata?: t.TFile['metadata'];
} }
export type ContextType = { navVisible: boolean; setNavVisible: (visible: boolean) => void }; export type ContextType = { navVisible: boolean; setNavVisible: (visible: boolean) => void };

View file

@ -1,7 +1,7 @@
import * as Ariakit from '@ariakit/react'; import * as Ariakit from '@ariakit/react';
import React, { useRef, useState, useMemo } from 'react'; import React, { useRef, useState, useMemo } from 'react';
import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react';
import { EToolResources, EModelEndpoint } from 'librechat-data-provider'; import { EToolResources, EModelEndpoint } from 'librechat-data-provider';
import { FileSearch, ImageUpIcon, TerminalSquareIcon, FileType2Icon } from 'lucide-react';
import { FileUpload, TooltipAnchor, DropdownPopup } from '~/components/ui'; import { FileUpload, TooltipAnchor, DropdownPopup } from '~/components/ui';
import { useGetEndpointsQuery } from '~/data-provider'; import { useGetEndpointsQuery } from '~/data-provider';
import { AttachmentIcon } from '~/components/svg'; import { AttachmentIcon } from '~/components/svg';
@ -49,6 +49,17 @@ const AttachFile = ({ isRTL, disabled, handleFileChange }: AttachFileProps) => {
}, },
]; ];
if (capabilities.includes(EToolResources.ocr)) {
items.push({
label: localize('com_ui_upload_ocr_text'),
onClick: () => {
setToolResource(EToolResources.ocr);
handleUploadClick();
},
icon: <FileType2Icon className="icon-md" />,
});
}
if (capabilities.includes(EToolResources.file_search)) { if (capabilities.includes(EToolResources.file_search)) {
items.push({ items.push({
label: localize('com_ui_upload_file_search'), label: localize('com_ui_upload_file_search'),

View file

@ -1,6 +1,6 @@
import React, { useMemo } from 'react'; import React, { useMemo } from 'react';
import { EModelEndpoint, EToolResources } from 'librechat-data-provider'; import { EModelEndpoint, EToolResources } from 'librechat-data-provider';
import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react'; import { FileSearch, ImageUpIcon, FileType2Icon, TerminalSquareIcon } from 'lucide-react';
import OGDialogTemplate from '~/components/ui/OGDialogTemplate'; import OGDialogTemplate from '~/components/ui/OGDialogTemplate';
import { useGetEndpointsQuery } from '~/data-provider'; import { useGetEndpointsQuery } from '~/data-provider';
import useLocalize from '~/hooks/useLocalize'; import useLocalize from '~/hooks/useLocalize';
@ -50,6 +50,12 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
value: EToolResources.execute_code, value: EToolResources.execute_code,
icon: <TerminalSquareIcon className="icon-md" />, icon: <TerminalSquareIcon className="icon-md" />,
}); });
} else if (capability === EToolResources.ocr) {
_options.push({
label: localize('com_ui_upload_ocr_text'),
value: EToolResources.ocr,
icon: <FileType2Icon className="icon-md" />,
});
} }
} }

View file

@ -19,7 +19,7 @@ const FilePreview = ({
}; };
className?: string; className?: string;
}) => { }) => {
const radius = 55; // Radius of the SVG circle const radius = 55;
const circumference = 2 * Math.PI * radius; const circumference = 2 * Math.PI * radius;
const progress = useProgress( const progress = useProgress(
file?.['progress'] ?? 1, file?.['progress'] ?? 1,
@ -27,16 +27,15 @@ const FilePreview = ({
(file as ExtendedFile | undefined)?.size ?? 1, (file as ExtendedFile | undefined)?.size ?? 1,
); );
// Calculate the offset based on the loading progress
const offset = circumference - progress * circumference; const offset = circumference - progress * circumference;
const circleCSSProperties = { const circleCSSProperties = {
transition: 'stroke-dashoffset 0.5s linear', transition: 'stroke-dashoffset 0.5s linear',
}; };
return ( return (
<div className={cn('size-10 shrink-0 overflow-hidden rounded-xl', className)}> <div className={cn('relative size-10 shrink-0 overflow-hidden rounded-xl', className)}>
<FileIcon file={file} fileType={fileType} /> <FileIcon file={file} fileType={fileType} />
<SourceIcon source={file?.source} /> <SourceIcon source={file?.source} isCodeFile={!!file?.['metadata']?.fileIdentifier} />
{progress < 1 && ( {progress < 1 && (
<ProgressCircle <ProgressCircle
circumference={circumference} circumference={circumference}

View file

@ -1,3 +1,4 @@
import { Terminal, Type, Database } from 'lucide-react';
import { EModelEndpoint, FileSources } from 'librechat-data-provider'; import { EModelEndpoint, FileSources } from 'librechat-data-provider';
import { MinimalIcon } from '~/components/Endpoints'; import { MinimalIcon } from '~/components/Endpoints';
import { cn } from '~/utils'; import { cn } from '~/utils';
@ -6,9 +7,13 @@ const sourceToEndpoint = {
[FileSources.openai]: EModelEndpoint.openAI, [FileSources.openai]: EModelEndpoint.openAI,
[FileSources.azure]: EModelEndpoint.azureOpenAI, [FileSources.azure]: EModelEndpoint.azureOpenAI,
}; };
const sourceToClassname = { const sourceToClassname = {
[FileSources.openai]: 'bg-white/75 dark:bg-black/65', [FileSources.openai]: 'bg-white/75 dark:bg-black/65',
[FileSources.azure]: 'azure-bg-color opacity-85', [FileSources.azure]: 'azure-bg-color opacity-85',
[FileSources.execute_code]: 'bg-black text-white opacity-85',
[FileSources.text]: 'bg-blue-100 dark:bg-blue-900 opacity-85 text-white',
[FileSources.vectordb]: 'bg-yellow-100 dark:bg-yellow-900 opacity-85 text-white',
}; };
const defaultClassName = const defaultClassName =
@ -16,13 +21,41 @@ const defaultClassName =
export default function SourceIcon({ export default function SourceIcon({
source, source,
isCodeFile,
className = defaultClassName, className = defaultClassName,
}: { }: {
source?: FileSources; source?: FileSources;
isCodeFile?: boolean;
className?: string; className?: string;
}) { }) {
if (source === FileSources.local || source === FileSources.firebase) { if (isCodeFile === true) {
return null; return (
<div className={cn(className, sourceToClassname[FileSources.execute_code] ?? '')}>
<span className="flex items-center justify-center">
<Terminal className="h-3 w-3" />
</span>
</div>
);
}
if (source === FileSources.text) {
return (
<div className={cn(className, sourceToClassname[source] ?? '')}>
<span className="flex items-center justify-center">
<Type className="h-3 w-3" />
</span>
</div>
);
}
if (source === FileSources.vectordb) {
return (
<div className={cn(className, sourceToClassname[source] ?? '')}>
<span className="flex items-center justify-center">
<Database className="h-3 w-3" />
</span>
</div>
);
} }
const endpoint = sourceToEndpoint[source ?? '']; const endpoint = sourceToEndpoint[source ?? ''];
@ -31,7 +64,7 @@ export default function SourceIcon({
return null; return null;
} }
return ( return (
<button type="button" className={cn(className, sourceToClassname[source ?? ''] ?? '')}> <div className={cn(className, sourceToClassname[source ?? ''] ?? '')}>
<span className="flex items-center justify-center"> <span className="flex items-center justify-center">
<MinimalIcon <MinimalIcon
endpoint={endpoint} endpoint={endpoint}
@ -40,6 +73,6 @@ export default function SourceIcon({
iconClassName="h-3 w-3" iconClassName="h-3 w-3"
/> />
</span> </span>
</button> </div>
); );
} }

View file

@ -23,6 +23,7 @@ import { processAgentOption } from '~/utils';
import AdminSettings from './AdminSettings'; import AdminSettings from './AdminSettings';
import DeleteButton from './DeleteButton'; import DeleteButton from './DeleteButton';
import AgentAvatar from './AgentAvatar'; import AgentAvatar from './AgentAvatar';
import FileContext from './FileContext';
import { Spinner } from '~/components'; import { Spinner } from '~/components';
import FileSearch from './FileSearch'; import FileSearch from './FileSearch';
import ShareAgent from './ShareAgent'; import ShareAgent from './ShareAgent';
@ -82,6 +83,10 @@ export default function AgentConfig({
() => agentsConfig?.capabilities.includes(AgentCapabilities.artifacts) ?? false, () => agentsConfig?.capabilities.includes(AgentCapabilities.artifacts) ?? false,
[agentsConfig], [agentsConfig],
); );
const ocrEnabled = useMemo(
() => agentsConfig?.capabilities.includes(AgentCapabilities.ocr) ?? false,
[agentsConfig],
);
const fileSearchEnabled = useMemo( const fileSearchEnabled = useMemo(
() => agentsConfig?.capabilities.includes(AgentCapabilities.file_search) ?? false, () => agentsConfig?.capabilities.includes(AgentCapabilities.file_search) ?? false,
[agentsConfig], [agentsConfig],
@ -91,6 +96,26 @@ export default function AgentConfig({
[agentsConfig], [agentsConfig],
); );
const context_files = useMemo(() => {
if (typeof agent === 'string') {
return [];
}
if (agent?.id !== agent_id) {
return [];
}
if (agent.context_files) {
return agent.context_files;
}
const _agent = processAgentOption({
agent,
fileMap,
});
return _agent.context_files ?? [];
}, [agent, agent_id, fileMap]);
const knowledge_files = useMemo(() => { const knowledge_files = useMemo(() => {
if (typeof agent === 'string') { if (typeof agent === 'string') {
return []; return [];
@ -334,7 +359,7 @@ export default function AgentConfig({
</div> </div>
</button> </button>
</div> </div>
{(codeEnabled || fileSearchEnabled || artifactsEnabled) && ( {(codeEnabled || fileSearchEnabled || artifactsEnabled || ocrEnabled) && (
<div className="mb-4 flex w-full flex-col items-start gap-3"> <div className="mb-4 flex w-full flex-col items-start gap-3">
<label className="text-token-text-primary block font-medium"> <label className="text-token-text-primary block font-medium">
{localize('com_assistants_capabilities')} {localize('com_assistants_capabilities')}
@ -345,6 +370,8 @@ export default function AgentConfig({
{fileSearchEnabled && <FileSearch agent_id={agent_id} files={knowledge_files} />} {fileSearchEnabled && <FileSearch agent_id={agent_id} files={knowledge_files} />}
{/* Artifacts */} {/* Artifacts */}
{artifactsEnabled && <Artifacts />} {artifactsEnabled && <Artifacts />}
{/* File Context (OCR) */}
{ocrEnabled && <FileContext agent_id={agent_id} files={context_files} />}
</div> </div>
)} )}
{/* Agent Tools & Actions */} {/* Agent Tools & Actions */}

View file

@ -0,0 +1,128 @@
import { useState, useRef } from 'react';
import {
EModelEndpoint,
EToolResources,
mergeFileConfig,
fileConfig as defaultFileConfig,
} from 'librechat-data-provider';
import type { ExtendedFile } from '~/common';
import { useFileHandling, useLocalize, useLazyEffect } from '~/hooks';
import FileRow from '~/components/Chat/Input/Files/FileRow';
import { useGetFileConfig } from '~/data-provider';
import { HoverCard, HoverCardContent, HoverCardPortal, HoverCardTrigger } from '~/components/ui';
import { AttachmentIcon, CircleHelpIcon } from '~/components/svg';
import { useChatContext } from '~/Providers';
import { ESide } from '~/common';
export default function FileContext({
agent_id,
files: _files,
}: {
agent_id: string;
files?: [string, ExtendedFile][];
}) {
const localize = useLocalize();
const { setFilesLoading } = useChatContext();
const fileInputRef = useRef<HTMLInputElement>(null);
const [files, setFiles] = useState<Map<string, ExtendedFile>>(new Map());
const { data: fileConfig = defaultFileConfig } = useGetFileConfig({
select: (data) => mergeFileConfig(data),
});
const { handleFileChange } = useFileHandling({
overrideEndpoint: EModelEndpoint.agents,
additionalMetadata: { agent_id, tool_resource: EToolResources.ocr },
fileSetter: setFiles,
});
useLazyEffect(
() => {
if (_files) {
setFiles(new Map(_files));
}
},
[_files],
750,
);
const endpointFileConfig = fileConfig.endpoints[EModelEndpoint.agents];
const isUploadDisabled = endpointFileConfig.disabled ?? false;
if (isUploadDisabled) {
return null;
}
const handleButtonClick = () => {
// necessary to reset the input
if (fileInputRef.current) {
fileInputRef.current.value = '';
}
fileInputRef.current?.click();
};
return (
<div className="w-full">
<HoverCard openDelay={50}>
<div className="mb-2 flex items-center gap-2">
<HoverCardTrigger asChild>
<span className="flex items-center gap-2">
<label className="text-token-text-primary block font-medium">
{localize('com_agents_file_context')}
</label>
<CircleHelpIcon className="h-4 w-4 text-text-tertiary" />
</span>
</HoverCardTrigger>
<HoverCardPortal>
<HoverCardContent side={ESide.Top} className="w-80">
<div className="space-y-2">
<p className="text-sm text-text-secondary">
{localize('com_agents_file_context_info')}
</p>
</div>
</HoverCardContent>
</HoverCardPortal>
</div>
</HoverCard>
<div className="flex flex-col gap-3">
{/* File Context (OCR) Files */}
<FileRow
files={files}
setFiles={setFiles}
setFilesLoading={setFilesLoading}
agent_id={agent_id}
tool_resource={EToolResources.ocr}
Wrapper={({ children }) => <div className="flex flex-wrap gap-2">{children}</div>}
/>
<div>
<button
type="button"
disabled={!agent_id}
className="btn btn-neutral border-token-border-light relative h-9 w-full rounded-lg font-medium"
onClick={handleButtonClick}
>
<div className="flex w-full items-center justify-center gap-1">
<AttachmentIcon className="text-token-text-primary h-4 w-4" />
<input
multiple={true}
type="file"
style={{ display: 'none' }}
tabIndex={-1}
ref={fileInputRef}
disabled={!agent_id}
onChange={handleFileChange}
/>
{localize('com_ui_upload_file_context')}
</div>
</button>
</div>
{/* Disabled Message */}
{agent_id ? null : (
<div className="text-xs text-text-secondary">
{localize('com_agents_file_context_disabled')}
</div>
)}
</div>
</div>
);
}

View file

@ -1,21 +1,23 @@
import { ArrowUpDown } from 'lucide-react'; import { ArrowUpDown } from 'lucide-react';
import type { ColumnDef } from '@tanstack/react-table'; import type { ColumnDef } from '@tanstack/react-table';
import type { TFile } from 'librechat-data-provider'; import type { TFile } from 'librechat-data-provider';
import useLocalize from '~/hooks/useLocalize';
import PanelFileCell from './PanelFileCell'; import PanelFileCell from './PanelFileCell';
import { Button } from '~/components/ui'; import { Button } from '~/components/ui';
import { formatDate } from '~/utils'; import { formatDate } from '~/utils';
export const columns: ColumnDef<TFile>[] = [ export const columns: ColumnDef<TFile | undefined>[] = [
{ {
accessorKey: 'filename', accessorKey: 'filename',
header: ({ column }) => { header: ({ column }) => {
const localize = useLocalize();
return ( return (
<Button <Button
variant="ghost" variant="ghost"
className="hover:bg-surface-hover" className="hover:bg-surface-hover"
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')} onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
> >
Name {localize('com_ui_name')}
<ArrowUpDown className="ml-2 h-4 w-4" /> <ArrowUpDown className="ml-2 h-4 w-4" />
</Button> </Button>
); );
@ -31,20 +33,21 @@ export const columns: ColumnDef<TFile>[] = [
size: '10%', size: '10%',
}, },
header: ({ column }) => { header: ({ column }) => {
const localize = useLocalize();
return ( return (
<Button <Button
variant="ghost" variant="ghost"
className="hover:bg-surface-hover" className="hover:bg-surface-hover"
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')} onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
> >
Date {localize('com_ui_date')}
<ArrowUpDown className="ml-2 h-4 w-4" /> <ArrowUpDown className="ml-2 h-4 w-4" />
</Button> </Button>
); );
}, },
cell: ({ row }) => ( cell: ({ row }) => (
<span className="flex justify-end text-xs"> <span className="flex justify-end text-xs">
{formatDate(row.original.updatedAt?.toString() ?? '')} {formatDate(row.original?.updatedAt?.toString() ?? '')}
</span> </span>
), ),
}, },

View file

@ -6,7 +6,6 @@ import { getFileType } from '~/utils';
export default function PanelFileCell({ row }: { row: Row<TFile | undefined> }) { export default function PanelFileCell({ row }: { row: Row<TFile | undefined> }) {
const file = row.original; const file = row.original;
return ( return (
<div className="flex w-full items-center gap-2"> <div className="flex w-full items-center gap-2">
{file?.type.startsWith('image') === true ? ( {file?.type.startsWith('image') === true ? (

View file

@ -159,6 +159,7 @@ export default function DataTable<TData, TValue>({ columns, data }: DataTablePro
filename: fileData.filename, filename: fileData.filename,
source: fileData.source, source: fileData.source,
size: fileData.bytes, size: fileData.bytes,
metadata: fileData.metadata,
}); });
}, },
[addFile, fileMap, conversation, localize, showToast, fileConfig.endpoints], [addFile, fileMap, conversation, localize, showToast, fileConfig.endpoints],

View file

@ -63,8 +63,9 @@ export const useUploadFileMutation = (
const update = {}; const update = {};
const prevResources = agent.tool_resources ?? {}; const prevResources = agent.tool_resources ?? {};
const prevResource: t.ExecuteCodeResource | t.AgentFileSearchResource = agent const prevResource: t.ExecuteCodeResource | t.AgentFileResource = agent.tool_resources?.[
.tool_resources?.[tool_resource] ?? { tool_resource
] ?? {
file_ids: [], file_ids: [],
}; };
if (!prevResource.file_ids) { if (!prevResource.file_ids) {

View file

@ -11,6 +11,9 @@
"com_agents_create_error": "There was an error creating your agent.", "com_agents_create_error": "There was an error creating your agent.",
"com_agents_description_placeholder": "Optional: Describe your Agent here", "com_agents_description_placeholder": "Optional: Describe your Agent here",
"com_agents_enable_file_search": "Enable File Search", "com_agents_enable_file_search": "Enable File Search",
"com_agents_file_context": "File Context (OCR)",
"com_agents_file_context_disabled": "Agent must be created before uploading files for File Context.",
"com_agents_file_context_info": "Files uploaded as \"Context\" are processed using OCR to extract text, which is then added to the Agent's instructions. Ideal for documents, images with text, or PDFs where you need the full text content of a file",
"com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.", "com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.",
"com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.", "com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.",
"com_agents_instructions_placeholder": "The system instructions that the agent uses", "com_agents_instructions_placeholder": "The system instructions that the agent uses",
@ -811,10 +814,12 @@
"com_ui_upload_code_files": "Upload for Code Interpreter", "com_ui_upload_code_files": "Upload for Code Interpreter",
"com_ui_upload_delay": "Uploading \"{{0}}\" is taking more time than anticipated. Please wait while the file finishes indexing for retrieval.", "com_ui_upload_delay": "Uploading \"{{0}}\" is taking more time than anticipated. Please wait while the file finishes indexing for retrieval.",
"com_ui_upload_error": "There was an error uploading your file", "com_ui_upload_error": "There was an error uploading your file",
"com_ui_upload_file_context": "Upload File Context",
"com_ui_upload_file_search": "Upload for File Search", "com_ui_upload_file_search": "Upload for File Search",
"com_ui_upload_files": "Upload files", "com_ui_upload_files": "Upload files",
"com_ui_upload_image": "Upload an image", "com_ui_upload_image": "Upload an image",
"com_ui_upload_image_input": "Upload Image", "com_ui_upload_image_input": "Upload Image",
"com_ui_upload_ocr_text": "Upload as Text",
"com_ui_upload_invalid": "Invalid file for upload. Must be an image not exceeding the limit", "com_ui_upload_invalid": "Invalid file for upload. Must be an image not exceeding the limit",
"com_ui_upload_invalid_var": "Invalid file for upload. Must be an image not exceeding {{0}} MB", "com_ui_upload_invalid_var": "Invalid file for upload. Must be an image not exceeding {{0}} MB",
"com_ui_upload_success": "Successfully uploaded file", "com_ui_upload_success": "Successfully uploaded file",
@ -835,4 +840,4 @@
"com_ui_zoom": "Zoom", "com_ui_zoom": "Zoom",
"com_user_message": "You", "com_user_message": "You",
"com_warning_resubmit_unsupported": "Resubmitting the AI message is not supported for this endpoint." "com_warning_resubmit_unsupported": "Resubmitting the AI message is not supported for this endpoint."
} }

View file

@ -58,6 +58,9 @@ export const processAgentOption = ({
label: _agent?.name ?? '', label: _agent?.name ?? '',
value: _agent?.id ?? '', value: _agent?.id ?? '',
icon: isGlobal ? <EarthIcon className="icon-md text-green-400" /> : null, icon: isGlobal ? <EarthIcon className="icon-md text-green-400" /> : null,
context_files: _agent?.tool_resources?.ocr?.file_ids
? ([] as Array<[string, ExtendedFile]>)
: undefined,
knowledge_files: _agent?.tool_resources?.file_search?.file_ids knowledge_files: _agent?.tool_resources?.file_search?.file_ids
? ([] as Array<[string, ExtendedFile]>) ? ([] as Array<[string, ExtendedFile]>)
: undefined, : undefined,
@ -83,7 +86,7 @@ export const processAgentOption = ({
const source = const source =
tool_resource === EToolResources.file_search tool_resource === EToolResources.file_search
? FileSources.vectordb ? FileSources.vectordb
: file?.source ?? FileSources.local; : (file?.source ?? FileSources.local);
if (file) { if (file) {
list?.push([ list?.push([
@ -97,6 +100,7 @@ export const processAgentOption = ({
height: file.height, height: file.height,
size: file.bytes, size: file.bytes,
preview: file.filepath, preview: file.filepath,
metadata: file.metadata,
progress: 1, progress: 1,
source, source,
}, },
@ -117,6 +121,16 @@ export const processAgentOption = ({
} }
}; };
if (agent.context_files && _agent?.tool_resources?.ocr?.file_ids) {
_agent.tool_resources.ocr.file_ids.forEach((file_id) =>
handleFile({
file_id,
list: agent.context_files,
tool_resource: EToolResources.ocr,
}),
);
}
if (agent.knowledge_files && _agent?.tool_resources?.file_search?.file_ids) { if (agent.knowledge_files && _agent?.tool_resources?.file_search?.file_ids) {
_agent.tool_resources.file_search.file_ids.forEach((file_id) => _agent.tool_resources.file_search.file_ids.forEach((file_id) =>
handleFile({ handleFile({

2
package-lock.json generated
View file

@ -41014,7 +41014,7 @@
}, },
"packages/data-provider": { "packages/data-provider": {
"name": "librechat-data-provider", "name": "librechat-data-provider",
"version": "0.7.7", "version": "0.7.71",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"axios": "^1.8.2", "axios": "^1.8.2",

View file

@ -1,6 +1,6 @@
{ {
"name": "librechat-data-provider", "name": "librechat-data-provider",
"version": "0.7.7", "version": "0.7.71",
"description": "data services for librechat apps", "description": "data services for librechat apps",
"main": "dist/index.js", "main": "dist/index.js",
"module": "dist/index.es.js", "module": "dist/index.es.js",

View file

@ -168,6 +168,7 @@ export enum AgentCapabilities {
artifacts = 'artifacts', artifacts = 'artifacts',
actions = 'actions', actions = 'actions',
tools = 'tools', tools = 'tools',
ocr = 'ocr',
} }
export const defaultAssistantsVersion = { export const defaultAssistantsVersion = {
@ -242,6 +243,7 @@ export const agentsEndpointSChema = baseEndpointSchema.merge(
AgentCapabilities.artifacts, AgentCapabilities.artifacts,
AgentCapabilities.actions, AgentCapabilities.actions,
AgentCapabilities.tools, AgentCapabilities.tools,
AgentCapabilities.ocr,
]), ]),
}), }),
); );
@ -534,9 +536,22 @@ export type TStartupConfig = {
bundlerURL?: string; bundlerURL?: string;
}; };
export enum OCRStrategy {
MISTRAL_OCR = 'mistral_ocr',
CUSTOM_OCR = 'custom_ocr',
}
export const ocrSchema = z.object({
mistralModel: z.string().optional(),
apiKey: z.string().optional().default('OCR_API_KEY'),
baseURL: z.string().optional().default('OCR_BASEURL'),
strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR),
});
export const configSchema = z.object({ export const configSchema = z.object({
version: z.string(), version: z.string(),
cache: z.boolean().default(true), cache: z.boolean().default(true),
ocr: ocrSchema.optional(),
secureImageLinks: z.boolean().optional(), secureImageLinks: z.boolean().optional(),
imageOutputType: z.nativeEnum(EImageOutputType).default(EImageOutputType.PNG), imageOutputType: z.nativeEnum(EImageOutputType).default(EImageOutputType.PNG),
includedTools: z.array(z.string()).optional(), includedTools: z.array(z.string()).optional(),
@ -1175,7 +1190,7 @@ export enum Constants {
/** Key for the app's version. */ /** Key for the app's version. */
VERSION = 'v0.7.7', VERSION = 'v0.7.7',
/** Key for the Custom Config's version (librechat.yaml). */ /** Key for the Custom Config's version (librechat.yaml). */
CONFIG_VERSION = '1.2.1', CONFIG_VERSION = '1.2.2',
/** Standard value for the first message's `parentMessageId` value, to indicate no parent exists. */ /** Standard value for the first message's `parentMessageId` value, to indicate no parent exists. */
NO_PARENT = '00000000-0000-0000-0000-000000000000', NO_PARENT = '00000000-0000-0000-0000-000000000000',
/** Standard value for the initial conversationId before a request is sent */ /** Standard value for the initial conversationId before a request is sent */

View file

@ -7,6 +7,7 @@ export * from './file-config';
export * from './artifacts'; export * from './artifacts';
/* schema helpers */ /* schema helpers */
export * from './parsers'; export * from './parsers';
export * from './ocr';
export * from './zod'; export * from './zod';
/* custom/dynamic configurations */ /* custom/dynamic configurations */
export * from './generate'; export * from './generate';

View file

@ -0,0 +1,14 @@
import type { TCustomConfig } from '../src/config';
import { OCRStrategy } from '../src/config';
export function loadOCRConfig(config: TCustomConfig['ocr']): TCustomConfig['ocr'] {
const baseURL = config?.baseURL ?? '';
const apiKey = config?.apiKey ?? '';
const mistralModel = config?.mistralModel ?? '';
return {
apiKey,
baseURL,
mistralModel,
strategy: config?.strategy ?? OCRStrategy.MISTRAL_OCR,
};
}

View file

@ -1152,7 +1152,6 @@ export const compactAgentsSchema = tConversationSchema
iconURL: true, iconURL: true,
greeting: true, greeting: true,
agent_id: true, agent_id: true,
resendFiles: true,
instructions: true, instructions: true,
additional_instructions: true, additional_instructions: true,
}) })

View file

@ -27,6 +27,7 @@ export enum EToolResources {
code_interpreter = 'code_interpreter', code_interpreter = 'code_interpreter',
execute_code = 'execute_code', execute_code = 'execute_code',
file_search = 'file_search', file_search = 'file_search',
ocr = 'ocr',
} }
export type Tool = { export type Tool = {
@ -163,7 +164,8 @@ export type AgentModelParameters = {
export interface AgentToolResources { export interface AgentToolResources {
execute_code?: ExecuteCodeResource; execute_code?: ExecuteCodeResource;
file_search?: AgentFileSearchResource; file_search?: AgentFileResource;
ocr?: Omit<AgentFileResource, 'vector_store_ids'>;
} }
export interface ExecuteCodeResource { export interface ExecuteCodeResource {
/** /**
@ -177,7 +179,7 @@ export interface ExecuteCodeResource {
files?: Array<TFile>; files?: Array<TFile>;
} }
export interface AgentFileSearchResource { export interface AgentFileResource {
/** /**
* The ID of the vector store attached to this agent. There * The ID of the vector store attached to this agent. There
* can be a maximum of 1 vector store attached to the agent. * can be a maximum of 1 vector store attached to the agent.

View file

@ -8,6 +8,8 @@ export enum FileSources {
s3 = 's3', s3 = 's3',
vectordb = 'vectordb', vectordb = 'vectordb',
execute_code = 'execute_code', execute_code = 'execute_code',
mistral_ocr = 'mistral_ocr',
text = 'text',
} }
export const checkOpenAIStorage = (source: string) => export const checkOpenAIStorage = (source: string) =>

View file

@ -8,6 +8,7 @@ export interface IMongoFile extends Document {
file_id: string; file_id: string;
temp_file_id?: string; temp_file_id?: string;
bytes: number; bytes: number;
text?: string;
filename: string; filename: string;
filepath: string; filepath: string;
object: 'file'; object: 'file';
@ -72,6 +73,9 @@ const file: Schema<IMongoFile> = new Schema(
type: String, type: String,
required: true, required: true,
}, },
text: {
type: String,
},
context: { context: {
type: String, type: String,
}, },