🔍 feat: Mistral OCR API / Upload Files as Text (#6274)

* refactor: move `loadAuthValues` to `~/services/Tools/credentials`

* feat: add createAxiosInstance function to configure axios with proxy support

* WIP: First pass mistral ocr

* refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic

* refactor: improve document formatting in encodeAndFormat function

* refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config)

* fix: update getFiles call to include files with `text` property as well

* refactor: move file handling to `initializeAgentOptions`

* refactor: enhance addImageURLs method to handle OCR text and improve message formatting

* refactor: update message formatting to handle OCR text in various content types

* refactor: remove unused resendFiles property from compactAgentsSchema

* fix: add error handling for Mistral OCR document upload and logging

* refactor: integrate OCR capability into file upload options and configuration

* refactor: skip processing for text source files in delete request, as they are directly tied to database

* feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling

* fix: source icon styling

* wip: first pass, frontend file context agent resources

* refactor: add hover card with contextual information for File Context (OCR) in FileContext component

* feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization

* feat: implement OCR config; fix: agent resource deletion for ocr files

* feat: enhance agent initialization by adding OCR capability check in resource priming

* ci: fix `~/config` module mock

* ci: add OCR property expectation in AppService tests

* refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed

* ci: add unit test to ensure environment variable references are not parsed in OCR config

* refactor: disable base64 image inclusion in OCR request

* refactor: enhance OCR configuration handling by validating environment variables and providing defaults

* refactor: use file stream from disk for mistral ocr api
This commit is contained in:
Danny Avila 2025-03-10 17:23:46 -04:00 committed by GitHub
parent 9db00edfc4
commit ded3cd8876
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 1621 additions and 131 deletions

View file

@ -2,6 +2,7 @@ const { createContentAggregator, Providers } = require('@librechat/agents');
const {
EModelEndpoint,
getResponseSender,
AgentCapabilities,
providerEndpointMap,
} = require('librechat-data-provider');
const {
@ -15,10 +16,13 @@ const initCustom = require('~/server/services/Endpoints/custom/initialize');
const initGoogle = require('~/server/services/Endpoints/google/initialize');
const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
const { getCustomEndpointConfig } = require('~/server/services/Config');
const { processFiles } = require('~/server/services/Files/process');
const { loadAgentTools } = require('~/server/services/ToolService');
const AgentClient = require('~/server/controllers/agents/client');
const { getToolFiles } = require('~/models/Conversation');
const { getModelMaxTokens } = require('~/utils');
const { getAgent } = require('~/models/Agent');
const { getFiles } = require('~/models/File');
const { logger } = require('~/config');
const providerConfigMap = {
@ -34,20 +38,38 @@ const providerConfigMap = {
};
/**
*
* @param {ServerRequest} req
* @param {Promise<Array<MongoFile | null>> | undefined} _attachments
* @param {AgentToolResources | undefined} _tool_resources
* @returns {Promise<{ attachments: Array<MongoFile | undefined> | undefined, tool_resources: AgentToolResources | undefined }>}
*/
const primeResources = async (_attachments, _tool_resources) => {
const primeResources = async (req, _attachments, _tool_resources) => {
try {
/** @type {Array<MongoFile | undefined> | undefined} */
let attachments;
const tool_resources = _tool_resources ?? {};
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
AgentCapabilities.ocr,
);
if (tool_resources.ocr?.file_ids && isOCREnabled) {
const context = await getFiles(
{
file_id: { $in: tool_resources.ocr.file_ids },
},
{},
{},
);
attachments = (attachments ?? []).concat(context);
}
if (!_attachments) {
return { attachments: undefined, tool_resources: _tool_resources };
return { attachments, tool_resources };
}
/** @type {Array<MongoFile | undefined> | undefined} */
const files = await _attachments;
const attachments = [];
const tool_resources = _tool_resources ?? {};
if (!attachments) {
/** @type {Array<MongoFile | undefined>} */
attachments = [];
}
for (const file of files) {
if (!file) {
@ -82,7 +104,6 @@ const primeResources = async (_attachments, _tool_resources) => {
* @param {ServerResponse} params.res
* @param {Agent} params.agent
* @param {object} [params.endpointOption]
* @param {AgentToolResources} [params.tool_resources]
* @param {boolean} [params.isInitialAgent]
* @returns {Promise<Agent>}
*/
@ -91,9 +112,28 @@ const initializeAgentOptions = async ({
res,
agent,
endpointOption,
tool_resources,
isInitialAgent = false,
}) => {
let currentFiles;
const requestFiles = req.body.files ?? [];
if (
isInitialAgent &&
req.body.conversationId != null &&
agent.model_parameters?.resendFiles === true
) {
const fileIds = (await getToolFiles(req.body.conversationId)).map((f) => f.file_id);
if (requestFiles.length || fileIds.length) {
currentFiles = await processFiles(requestFiles, fileIds);
}
} else if (isInitialAgent && requestFiles.length) {
currentFiles = await processFiles(requestFiles);
}
const { attachments, tool_resources } = await primeResources(
req,
currentFiles,
agent.tool_resources,
);
const { tools, toolContextMap } = await loadAgentTools({
req,
res,
@ -160,6 +200,7 @@ const initializeAgentOptions = async ({
return {
...agent,
tools,
attachments,
toolContextMap,
maxContextTokens:
agent.max_context_tokens ??
@ -197,11 +238,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
throw new Error('Agent not found');
}
const { attachments, tool_resources } = await primeResources(
endpointOption.attachments,
primaryAgent.tool_resources,
);
const agentConfigs = new Map();
// Handle primary agent
@ -210,7 +246,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
res,
agent: primaryAgent,
endpointOption,
tool_resources,
isInitialAgent: true,
});
@ -240,18 +275,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
const client = new AgentClient({
req,
agent: primaryConfig,
sender,
attachments,
contentParts,
agentConfigs,
eventHandlers,
collectedUsage,
artifactPromises,
agent: primaryConfig,
spec: endpointOption.spec,
iconURL: endpointOption.iconURL,
agentConfigs,
endpoint: EModelEndpoint.agents,
attachments: primaryConfig.attachments,
maxContextTokens: primaryConfig.maxContextTokens,
resendFiles: primaryConfig.model_parameters?.resendFiles ?? true,
});
return { client };