LibreChat/api/server/middleware/buildEndpointOption.js
Danny Avila ded3cd8876
🔍 feat: Mistral OCR API / Upload Files as Text (#6274)
* refactor: move `loadAuthValues` to `~/services/Tools/credentials`

* feat: add createAxiosInstance function to configure axios with proxy support

* WIP: First pass mistral ocr

* refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic

* refactor: improve document formatting in encodeAndFormat function

* refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config)

* fix: update getFiles call to include files with `text` property as well

* refactor: move file handling to `initializeAgentOptions`

* refactor: enhance addImageURLs method to handle OCR text and improve message formatting

* refactor: update message formatting to handle OCR text in various content types

* refactor: remove unused resendFiles property from compactAgentsSchema

* fix: add error handling for Mistral OCR document upload and logging

* refactor: integrate OCR capability into file upload options and configuration

* refactor: skip processing for text source files in delete request, as they are directly tied to database

* feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling

* fix: source icon styling

* wip: first pass, frontend file context agent resources

* refactor: add hover card with contextual information for File Context (OCR) in FileContext component

* feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization

* feat: implement OCR config; fix: agent resource deletion for ocr files

* feat: enhance agent initialization by adding OCR capability check in resource priming

* ci: fix `~/config` module mock

* ci: add OCR property expectation in AppService tests

* refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed

* ci: add unit test to ensure environment variable references are not parsed in OCR config

* refactor: disable base64 image inclusion in OCR request

* refactor: enhance OCR configuration handling by validating environment variables and providing defaults

* refactor: use file stream from disk for mistral ocr api
2025-03-10 17:23:46 -04:00

99 lines
3.7 KiB
JavaScript

const { parseCompactConvo, EModelEndpoint, isAgentsEndpoint } = require('librechat-data-provider');
const { getModelsConfig } = require('~/server/controllers/ModelController');
const azureAssistants = require('~/server/services/Endpoints/azureAssistants');
const assistants = require('~/server/services/Endpoints/assistants');
const gptPlugins = require('~/server/services/Endpoints/gptPlugins');
const { processFiles } = require('~/server/services/Files/process');
const anthropic = require('~/server/services/Endpoints/anthropic');
const bedrock = require('~/server/services/Endpoints/bedrock');
const openAI = require('~/server/services/Endpoints/openAI');
const agents = require('~/server/services/Endpoints/agents');
const custom = require('~/server/services/Endpoints/custom');
const google = require('~/server/services/Endpoints/google');
const { handleError } = require('~/server/utils');
const buildFunction = {
[EModelEndpoint.openAI]: openAI.buildOptions,
[EModelEndpoint.google]: google.buildOptions,
[EModelEndpoint.custom]: custom.buildOptions,
[EModelEndpoint.agents]: agents.buildOptions,
[EModelEndpoint.bedrock]: bedrock.buildOptions,
[EModelEndpoint.azureOpenAI]: openAI.buildOptions,
[EModelEndpoint.anthropic]: anthropic.buildOptions,
[EModelEndpoint.gptPlugins]: gptPlugins.buildOptions,
[EModelEndpoint.assistants]: assistants.buildOptions,
[EModelEndpoint.azureAssistants]: azureAssistants.buildOptions,
};
async function buildEndpointOption(req, res, next) {
const { endpoint, endpointType } = req.body;
let parsedBody;
try {
parsedBody = parseCompactConvo({ endpoint, endpointType, conversation: req.body });
} catch (error) {
return handleError(res, { text: 'Error parsing conversation' });
}
if (req.app.locals.modelSpecs?.list && req.app.locals.modelSpecs?.enforce) {
/** @type {{ list: TModelSpec[] }}*/
const { list } = req.app.locals.modelSpecs;
const { spec } = parsedBody;
if (!spec) {
return handleError(res, { text: 'No model spec selected' });
}
const currentModelSpec = list.find((s) => s.name === spec);
if (!currentModelSpec) {
return handleError(res, { text: 'Invalid model spec' });
}
if (endpoint !== currentModelSpec.preset.endpoint) {
return handleError(res, { text: 'Model spec mismatch' });
}
if (
currentModelSpec.preset.endpoint !== EModelEndpoint.gptPlugins &&
currentModelSpec.preset.tools
) {
return handleError(res, {
text: `Only the "${EModelEndpoint.gptPlugins}" endpoint can have tools defined in the preset`,
});
}
try {
currentModelSpec.preset.spec = spec;
if (currentModelSpec.iconURL != null && currentModelSpec.iconURL !== '') {
currentModelSpec.preset.iconURL = currentModelSpec.iconURL;
}
parsedBody = parseCompactConvo({
endpoint,
endpointType,
conversation: currentModelSpec.preset,
});
} catch (error) {
return handleError(res, { text: 'Error parsing model spec' });
}
}
try {
const isAgents = isAgentsEndpoint(endpoint);
const endpointFn = buildFunction[endpointType ?? endpoint];
const builder = isAgents ? (...args) => endpointFn(req, ...args) : endpointFn;
// TODO: use object params
req.body.endpointOption = await builder(endpoint, parsedBody, endpointType);
// TODO: use `getModelsConfig` only when necessary
const modelsConfig = await getModelsConfig(req);
req.body.endpointOption.modelsConfig = modelsConfig;
if (req.body.files && !isAgents) {
req.body.endpointOption.attachments = processFiles(req.body.files);
}
next();
} catch (error) {
return handleError(res, { text: 'Error building endpoint option' });
}
}
module.exports = buildEndpointOption;