mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-18 01:10:14 +01:00
🔍 feat: Mistral OCR API / Upload Files as Text (#6274)
* refactor: move `loadAuthValues` to `~/services/Tools/credentials` * feat: add createAxiosInstance function to configure axios with proxy support * WIP: First pass mistral ocr * refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic * refactor: improve document formatting in encodeAndFormat function * refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config) * fix: update getFiles call to include files with `text` property as well * refactor: move file handling to `initializeAgentOptions` * refactor: enhance addImageURLs method to handle OCR text and improve message formatting * refactor: update message formatting to handle OCR text in various content types * refactor: remove unused resendFiles property from compactAgentsSchema * fix: add error handling for Mistral OCR document upload and logging * refactor: integrate OCR capability into file upload options and configuration * refactor: skip processing for text source files in delete request, as they are directly tied to database * feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling * fix: source icon styling * wip: first pass, frontend file context agent resources * refactor: add hover card with contextual information for File Context (OCR) in FileContext component * feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization * feat: implement OCR config; fix: agent resource deletion for ocr files * feat: enhance agent initialization by adding OCR capability check in resource priming * ci: fix `~/config` module mock * ci: add OCR property expectation in AppService tests * refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed * ci: add unit test to ensure environment variable references are not parsed in OCR config * refactor: disable base64 image inclusion in OCR request * refactor: enhance OCR configuration handling by validating environment variables and providing defaults * refactor: use file stream from disk for mistral ocr api
This commit is contained in:
parent
9db00edfc4
commit
ded3cd8876
48 changed files with 1621 additions and 131 deletions
|
|
@ -2,6 +2,7 @@ const { createContentAggregator, Providers } = require('@librechat/agents');
|
|||
const {
|
||||
EModelEndpoint,
|
||||
getResponseSender,
|
||||
AgentCapabilities,
|
||||
providerEndpointMap,
|
||||
} = require('librechat-data-provider');
|
||||
const {
|
||||
|
|
@ -15,10 +16,13 @@ const initCustom = require('~/server/services/Endpoints/custom/initialize');
|
|||
const initGoogle = require('~/server/services/Endpoints/google/initialize');
|
||||
const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
|
||||
const { getCustomEndpointConfig } = require('~/server/services/Config');
|
||||
const { processFiles } = require('~/server/services/Files/process');
|
||||
const { loadAgentTools } = require('~/server/services/ToolService');
|
||||
const AgentClient = require('~/server/controllers/agents/client');
|
||||
const { getToolFiles } = require('~/models/Conversation');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { getAgent } = require('~/models/Agent');
|
||||
const { getFiles } = require('~/models/File');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const providerConfigMap = {
|
||||
|
|
@ -34,20 +38,38 @@ const providerConfigMap = {
|
|||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {ServerRequest} req
|
||||
* @param {Promise<Array<MongoFile | null>> | undefined} _attachments
|
||||
* @param {AgentToolResources | undefined} _tool_resources
|
||||
* @returns {Promise<{ attachments: Array<MongoFile | undefined> | undefined, tool_resources: AgentToolResources | undefined }>}
|
||||
*/
|
||||
const primeResources = async (_attachments, _tool_resources) => {
|
||||
const primeResources = async (req, _attachments, _tool_resources) => {
|
||||
try {
|
||||
/** @type {Array<MongoFile | undefined> | undefined} */
|
||||
let attachments;
|
||||
const tool_resources = _tool_resources ?? {};
|
||||
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
|
||||
AgentCapabilities.ocr,
|
||||
);
|
||||
if (tool_resources.ocr?.file_ids && isOCREnabled) {
|
||||
const context = await getFiles(
|
||||
{
|
||||
file_id: { $in: tool_resources.ocr.file_ids },
|
||||
},
|
||||
{},
|
||||
{},
|
||||
);
|
||||
attachments = (attachments ?? []).concat(context);
|
||||
}
|
||||
if (!_attachments) {
|
||||
return { attachments: undefined, tool_resources: _tool_resources };
|
||||
return { attachments, tool_resources };
|
||||
}
|
||||
/** @type {Array<MongoFile | undefined> | undefined} */
|
||||
const files = await _attachments;
|
||||
const attachments = [];
|
||||
const tool_resources = _tool_resources ?? {};
|
||||
if (!attachments) {
|
||||
/** @type {Array<MongoFile | undefined>} */
|
||||
attachments = [];
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
if (!file) {
|
||||
|
|
@ -82,7 +104,6 @@ const primeResources = async (_attachments, _tool_resources) => {
|
|||
* @param {ServerResponse} params.res
|
||||
* @param {Agent} params.agent
|
||||
* @param {object} [params.endpointOption]
|
||||
* @param {AgentToolResources} [params.tool_resources]
|
||||
* @param {boolean} [params.isInitialAgent]
|
||||
* @returns {Promise<Agent>}
|
||||
*/
|
||||
|
|
@ -91,9 +112,28 @@ const initializeAgentOptions = async ({
|
|||
res,
|
||||
agent,
|
||||
endpointOption,
|
||||
tool_resources,
|
||||
isInitialAgent = false,
|
||||
}) => {
|
||||
let currentFiles;
|
||||
const requestFiles = req.body.files ?? [];
|
||||
if (
|
||||
isInitialAgent &&
|
||||
req.body.conversationId != null &&
|
||||
agent.model_parameters?.resendFiles === true
|
||||
) {
|
||||
const fileIds = (await getToolFiles(req.body.conversationId)).map((f) => f.file_id);
|
||||
if (requestFiles.length || fileIds.length) {
|
||||
currentFiles = await processFiles(requestFiles, fileIds);
|
||||
}
|
||||
} else if (isInitialAgent && requestFiles.length) {
|
||||
currentFiles = await processFiles(requestFiles);
|
||||
}
|
||||
|
||||
const { attachments, tool_resources } = await primeResources(
|
||||
req,
|
||||
currentFiles,
|
||||
agent.tool_resources,
|
||||
);
|
||||
const { tools, toolContextMap } = await loadAgentTools({
|
||||
req,
|
||||
res,
|
||||
|
|
@ -160,6 +200,7 @@ const initializeAgentOptions = async ({
|
|||
return {
|
||||
...agent,
|
||||
tools,
|
||||
attachments,
|
||||
toolContextMap,
|
||||
maxContextTokens:
|
||||
agent.max_context_tokens ??
|
||||
|
|
@ -197,11 +238,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
throw new Error('Agent not found');
|
||||
}
|
||||
|
||||
const { attachments, tool_resources } = await primeResources(
|
||||
endpointOption.attachments,
|
||||
primaryAgent.tool_resources,
|
||||
);
|
||||
|
||||
const agentConfigs = new Map();
|
||||
|
||||
// Handle primary agent
|
||||
|
|
@ -210,7 +246,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
res,
|
||||
agent: primaryAgent,
|
||||
endpointOption,
|
||||
tool_resources,
|
||||
isInitialAgent: true,
|
||||
});
|
||||
|
||||
|
|
@ -240,18 +275,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
|
||||
const client = new AgentClient({
|
||||
req,
|
||||
agent: primaryConfig,
|
||||
sender,
|
||||
attachments,
|
||||
contentParts,
|
||||
agentConfigs,
|
||||
eventHandlers,
|
||||
collectedUsage,
|
||||
artifactPromises,
|
||||
agent: primaryConfig,
|
||||
spec: endpointOption.spec,
|
||||
iconURL: endpointOption.iconURL,
|
||||
agentConfigs,
|
||||
endpoint: EModelEndpoint.agents,
|
||||
attachments: primaryConfig.attachments,
|
||||
maxContextTokens: primaryConfig.maxContextTokens,
|
||||
resendFiles: primaryConfig.model_parameters?.resendFiles ?? true,
|
||||
});
|
||||
|
||||
return { client };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue