mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-10 20:48:54 +01:00
🔍 feat: Mistral OCR API / Upload Files as Text (#6274)
* refactor: move `loadAuthValues` to `~/services/Tools/credentials` * feat: add createAxiosInstance function to configure axios with proxy support * WIP: First pass mistral ocr * refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic * refactor: improve document formatting in encodeAndFormat function * refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config) * fix: update getFiles call to include files with `text` property as well * refactor: move file handling to `initializeAgentOptions` * refactor: enhance addImageURLs method to handle OCR text and improve message formatting * refactor: update message formatting to handle OCR text in various content types * refactor: remove unused resendFiles property from compactAgentsSchema * fix: add error handling for Mistral OCR document upload and logging * refactor: integrate OCR capability into file upload options and configuration * refactor: skip processing for text source files in delete request, as they are directly tied to database * feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling * fix: source icon styling * wip: first pass, frontend file context agent resources * refactor: add hover card with contextual information for File Context (OCR) in FileContext component * feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization * feat: implement OCR config; fix: agent resource deletion for ocr files * feat: enhance agent initialization by adding OCR capability check in resource priming * ci: fix `~/config` module mock * ci: add OCR property expectation in AppService tests * refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed * ci: add unit test to ensure environment variable references are not parsed in OCR config * refactor: disable base64 image inclusion in OCR request * refactor: enhance OCR configuration handling by validating environment variables and providing defaults * refactor: use file stream from disk for mistral ocr api
This commit is contained in:
parent
9db00edfc4
commit
ded3cd8876
48 changed files with 1621 additions and 131 deletions
|
|
@ -10,8 +10,8 @@ const {
|
|||
ChatModelStreamHandler,
|
||||
} = require('@librechat/agents');
|
||||
const { processCodeOutput } = require('~/server/services/Files/Code/process');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { saveBase64Image } = require('~/server/services/Files/process');
|
||||
const { loadAuthValues } = require('~/app/clients/tools/util');
|
||||
const { logger, sendEvent } = require('~/config');
|
||||
|
||||
/** @typedef {import('@librechat/agents').Graph} Graph */
|
||||
|
|
|
|||
|
|
@ -223,14 +223,23 @@ class AgentClient extends BaseClient {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {TMessage} message
|
||||
* @param {Array<MongoFile>} attachments
|
||||
* @returns {Promise<Array<Partial<MongoFile>>>}
|
||||
*/
|
||||
async addImageURLs(message, attachments) {
|
||||
const { files, image_urls } = await encodeAndFormat(
|
||||
const { files, text, image_urls } = await encodeAndFormat(
|
||||
this.options.req,
|
||||
attachments,
|
||||
this.options.agent.provider,
|
||||
VisionModes.agents,
|
||||
);
|
||||
message.image_urls = image_urls.length ? image_urls : undefined;
|
||||
if (text && text.length) {
|
||||
message.ocr = text;
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
|
|
@ -308,7 +317,21 @@ class AgentClient extends BaseClient {
|
|||
assistantName: this.options?.modelLabel,
|
||||
});
|
||||
|
||||
const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount;
|
||||
if (message.ocr && i !== orderedMessages.length - 1) {
|
||||
if (typeof formattedMessage.content === 'string') {
|
||||
formattedMessage.content = message.ocr + '\n' + formattedMessage.content;
|
||||
} else {
|
||||
const textPart = formattedMessage.content.find((part) => part.type === 'text');
|
||||
textPart
|
||||
? (textPart.text = message.ocr + '\n' + textPart.text)
|
||||
: formattedMessage.content.unshift({ type: 'text', text: message.ocr });
|
||||
}
|
||||
} else if (message.ocr && i === orderedMessages.length - 1) {
|
||||
systemContent = [systemContent, message.ocr].join('\n');
|
||||
}
|
||||
|
||||
const needsTokenCount =
|
||||
(this.contextStrategy && !orderedMessages[i].tokenCount) || message.ocr;
|
||||
|
||||
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
|
||||
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ const {
|
|||
const { processFileURL, uploadImageBuffer } = require('~/server/services/Files/process');
|
||||
const { processCodeOutput } = require('~/server/services/Files/Code/process');
|
||||
const { createToolCall, getToolCallsByConvo } = require('~/models/ToolCall');
|
||||
const { loadAuthValues, loadTools } = require('~/app/clients/tools/util');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { loadTools } = require('~/app/clients/tools/util');
|
||||
const { checkAccess } = require('~/server/middleware');
|
||||
const { getMessage } = require('~/models/Message');
|
||||
const { logger } = require('~/config');
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ const openAI = require('~/server/services/Endpoints/openAI');
|
|||
const agents = require('~/server/services/Endpoints/agents');
|
||||
const custom = require('~/server/services/Endpoints/custom');
|
||||
const google = require('~/server/services/Endpoints/google');
|
||||
const { getConvoFiles } = require('~/models/Conversation');
|
||||
const { handleError } = require('~/server/utils');
|
||||
|
||||
const buildFunction = {
|
||||
|
|
@ -87,16 +86,8 @@ async function buildEndpointOption(req, res, next) {
|
|||
|
||||
// TODO: use `getModelsConfig` only when necessary
|
||||
const modelsConfig = await getModelsConfig(req);
|
||||
const { resendFiles = true } = req.body.endpointOption;
|
||||
req.body.endpointOption.modelsConfig = modelsConfig;
|
||||
if (isAgents && resendFiles && req.body.conversationId) {
|
||||
const fileIds = await getConvoFiles(req.body.conversationId);
|
||||
const requestFiles = req.body.files ?? [];
|
||||
if (requestFiles.length || fileIds.length) {
|
||||
req.body.endpointOption.attachments = processFiles(requestFiles, fileIds);
|
||||
}
|
||||
} else if (req.body.files) {
|
||||
// hold the promise
|
||||
if (req.body.files && !isAgents) {
|
||||
req.body.endpointOption.attachments = processFiles(req.body.files);
|
||||
}
|
||||
next();
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ const {
|
|||
} = require('~/server/services/Files/process');
|
||||
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
||||
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
|
||||
const { loadAuthValues } = require('~/app/clients/tools/util');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { getAgent } = require('~/models/Agent');
|
||||
const { getFiles } = require('~/models/File');
|
||||
const { logger } = require('~/config');
|
||||
|
|
|
|||
|
|
@ -1,4 +1,9 @@
|
|||
const { FileSources, EModelEndpoint, getConfigDefaults } = require('librechat-data-provider');
|
||||
const {
|
||||
FileSources,
|
||||
EModelEndpoint,
|
||||
loadOCRConfig,
|
||||
getConfigDefaults,
|
||||
} = require('librechat-data-provider');
|
||||
const { checkVariables, checkHealth, checkConfig, checkAzureVariables } = require('./start/checks');
|
||||
const { azureAssistantsDefaults, assistantsConfigSetup } = require('./start/assistants');
|
||||
const { initializeFirebase } = require('./Files/Firebase/initialize');
|
||||
|
|
@ -25,6 +30,7 @@ const AppService = async (app) => {
|
|||
const config = (await loadCustomConfig()) ?? {};
|
||||
const configDefaults = getConfigDefaults();
|
||||
|
||||
const ocr = loadOCRConfig(config.ocr);
|
||||
const filteredTools = config.filteredTools;
|
||||
const includedTools = config.includedTools;
|
||||
const fileStrategy = config.fileStrategy ?? configDefaults.fileStrategy;
|
||||
|
|
@ -57,6 +63,7 @@ const AppService = async (app) => {
|
|||
const interfaceConfig = await loadDefaultInterface(config, configDefaults);
|
||||
|
||||
const defaultLocals = {
|
||||
ocr,
|
||||
paths,
|
||||
fileStrategy,
|
||||
socialLogins,
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ describe('AppService', () => {
|
|||
},
|
||||
},
|
||||
paths: expect.anything(),
|
||||
ocr: expect.anything(),
|
||||
imageOutputType: expect.any(String),
|
||||
fileConfig: undefined,
|
||||
secureImageLinks: undefined,
|
||||
|
|
@ -588,4 +589,33 @@ describe('AppService updating app.locals and issuing warnings', () => {
|
|||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('should not parse environment variable references in OCR config', async () => {
|
||||
// Mock custom configuration with env variable references in OCR config
|
||||
const mockConfig = {
|
||||
ocr: {
|
||||
apiKey: '${OCR_API_KEY_CUSTOM_VAR_NAME}',
|
||||
baseURL: '${OCR_BASEURL_CUSTOM_VAR_NAME}',
|
||||
strategy: 'mistral_ocr',
|
||||
mistralModel: 'mistral-medium',
|
||||
},
|
||||
};
|
||||
|
||||
require('./Config/loadCustomConfig').mockImplementationOnce(() => Promise.resolve(mockConfig));
|
||||
|
||||
// Set actual environment variables with different values
|
||||
process.env.OCR_API_KEY_CUSTOM_VAR_NAME = 'actual-api-key';
|
||||
process.env.OCR_BASEURL_CUSTOM_VAR_NAME = 'https://actual-ocr-url.com';
|
||||
|
||||
// Initialize app
|
||||
const app = { locals: {} };
|
||||
await AppService(app);
|
||||
|
||||
// Verify that the raw string references were preserved and not interpolated
|
||||
expect(app.locals.ocr).toBeDefined();
|
||||
expect(app.locals.ocr.apiKey).toEqual('${OCR_API_KEY_CUSTOM_VAR_NAME}');
|
||||
expect(app.locals.ocr.baseURL).toEqual('${OCR_BASEURL_CUSTOM_VAR_NAME}');
|
||||
expect(app.locals.ocr.strategy).toEqual('mistral_ocr');
|
||||
expect(app.locals.ocr.mistralModel).toEqual('mistral-medium');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,15 +2,8 @@ const { loadAgent } = require('~/models/Agent');
|
|||
const { logger } = require('~/config');
|
||||
|
||||
const buildOptions = (req, endpoint, parsedBody) => {
|
||||
const {
|
||||
spec,
|
||||
iconURL,
|
||||
agent_id,
|
||||
instructions,
|
||||
maxContextTokens,
|
||||
resendFiles = true,
|
||||
...model_parameters
|
||||
} = parsedBody;
|
||||
const { spec, iconURL, agent_id, instructions, maxContextTokens, ...model_parameters } =
|
||||
parsedBody;
|
||||
const agentPromise = loadAgent({
|
||||
req,
|
||||
agent_id,
|
||||
|
|
@ -24,7 +17,6 @@ const buildOptions = (req, endpoint, parsedBody) => {
|
|||
iconURL,
|
||||
endpoint,
|
||||
agent_id,
|
||||
resendFiles,
|
||||
instructions,
|
||||
maxContextTokens,
|
||||
model_parameters,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ const { createContentAggregator, Providers } = require('@librechat/agents');
|
|||
const {
|
||||
EModelEndpoint,
|
||||
getResponseSender,
|
||||
AgentCapabilities,
|
||||
providerEndpointMap,
|
||||
} = require('librechat-data-provider');
|
||||
const {
|
||||
|
|
@ -15,10 +16,13 @@ const initCustom = require('~/server/services/Endpoints/custom/initialize');
|
|||
const initGoogle = require('~/server/services/Endpoints/google/initialize');
|
||||
const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
|
||||
const { getCustomEndpointConfig } = require('~/server/services/Config');
|
||||
const { processFiles } = require('~/server/services/Files/process');
|
||||
const { loadAgentTools } = require('~/server/services/ToolService');
|
||||
const AgentClient = require('~/server/controllers/agents/client');
|
||||
const { getToolFiles } = require('~/models/Conversation');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { getAgent } = require('~/models/Agent');
|
||||
const { getFiles } = require('~/models/File');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const providerConfigMap = {
|
||||
|
|
@ -34,20 +38,38 @@ const providerConfigMap = {
|
|||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {ServerRequest} req
|
||||
* @param {Promise<Array<MongoFile | null>> | undefined} _attachments
|
||||
* @param {AgentToolResources | undefined} _tool_resources
|
||||
* @returns {Promise<{ attachments: Array<MongoFile | undefined> | undefined, tool_resources: AgentToolResources | undefined }>}
|
||||
*/
|
||||
const primeResources = async (_attachments, _tool_resources) => {
|
||||
const primeResources = async (req, _attachments, _tool_resources) => {
|
||||
try {
|
||||
/** @type {Array<MongoFile | undefined> | undefined} */
|
||||
let attachments;
|
||||
const tool_resources = _tool_resources ?? {};
|
||||
const isOCREnabled = (req.app.locals?.[EModelEndpoint.agents]?.capabilities ?? []).includes(
|
||||
AgentCapabilities.ocr,
|
||||
);
|
||||
if (tool_resources.ocr?.file_ids && isOCREnabled) {
|
||||
const context = await getFiles(
|
||||
{
|
||||
file_id: { $in: tool_resources.ocr.file_ids },
|
||||
},
|
||||
{},
|
||||
{},
|
||||
);
|
||||
attachments = (attachments ?? []).concat(context);
|
||||
}
|
||||
if (!_attachments) {
|
||||
return { attachments: undefined, tool_resources: _tool_resources };
|
||||
return { attachments, tool_resources };
|
||||
}
|
||||
/** @type {Array<MongoFile | undefined> | undefined} */
|
||||
const files = await _attachments;
|
||||
const attachments = [];
|
||||
const tool_resources = _tool_resources ?? {};
|
||||
if (!attachments) {
|
||||
/** @type {Array<MongoFile | undefined>} */
|
||||
attachments = [];
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
if (!file) {
|
||||
|
|
@ -82,7 +104,6 @@ const primeResources = async (_attachments, _tool_resources) => {
|
|||
* @param {ServerResponse} params.res
|
||||
* @param {Agent} params.agent
|
||||
* @param {object} [params.endpointOption]
|
||||
* @param {AgentToolResources} [params.tool_resources]
|
||||
* @param {boolean} [params.isInitialAgent]
|
||||
* @returns {Promise<Agent>}
|
||||
*/
|
||||
|
|
@ -91,9 +112,28 @@ const initializeAgentOptions = async ({
|
|||
res,
|
||||
agent,
|
||||
endpointOption,
|
||||
tool_resources,
|
||||
isInitialAgent = false,
|
||||
}) => {
|
||||
let currentFiles;
|
||||
const requestFiles = req.body.files ?? [];
|
||||
if (
|
||||
isInitialAgent &&
|
||||
req.body.conversationId != null &&
|
||||
agent.model_parameters?.resendFiles === true
|
||||
) {
|
||||
const fileIds = (await getToolFiles(req.body.conversationId)).map((f) => f.file_id);
|
||||
if (requestFiles.length || fileIds.length) {
|
||||
currentFiles = await processFiles(requestFiles, fileIds);
|
||||
}
|
||||
} else if (isInitialAgent && requestFiles.length) {
|
||||
currentFiles = await processFiles(requestFiles);
|
||||
}
|
||||
|
||||
const { attachments, tool_resources } = await primeResources(
|
||||
req,
|
||||
currentFiles,
|
||||
agent.tool_resources,
|
||||
);
|
||||
const { tools, toolContextMap } = await loadAgentTools({
|
||||
req,
|
||||
res,
|
||||
|
|
@ -160,6 +200,7 @@ const initializeAgentOptions = async ({
|
|||
return {
|
||||
...agent,
|
||||
tools,
|
||||
attachments,
|
||||
toolContextMap,
|
||||
maxContextTokens:
|
||||
agent.max_context_tokens ??
|
||||
|
|
@ -197,11 +238,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
throw new Error('Agent not found');
|
||||
}
|
||||
|
||||
const { attachments, tool_resources } = await primeResources(
|
||||
endpointOption.attachments,
|
||||
primaryAgent.tool_resources,
|
||||
);
|
||||
|
||||
const agentConfigs = new Map();
|
||||
|
||||
// Handle primary agent
|
||||
|
|
@ -210,7 +246,6 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
res,
|
||||
agent: primaryAgent,
|
||||
endpointOption,
|
||||
tool_resources,
|
||||
isInitialAgent: true,
|
||||
});
|
||||
|
||||
|
|
@ -240,18 +275,19 @@ const initializeClient = async ({ req, res, endpointOption }) => {
|
|||
|
||||
const client = new AgentClient({
|
||||
req,
|
||||
agent: primaryConfig,
|
||||
sender,
|
||||
attachments,
|
||||
contentParts,
|
||||
agentConfigs,
|
||||
eventHandlers,
|
||||
collectedUsage,
|
||||
artifactPromises,
|
||||
agent: primaryConfig,
|
||||
spec: endpointOption.spec,
|
||||
iconURL: endpointOption.iconURL,
|
||||
agentConfigs,
|
||||
endpoint: EModelEndpoint.agents,
|
||||
attachments: primaryConfig.attachments,
|
||||
maxContextTokens: primaryConfig.maxContextTokens,
|
||||
resendFiles: primaryConfig.model_parameters?.resendFiles ?? true,
|
||||
});
|
||||
|
||||
return { client };
|
||||
|
|
|
|||
207
api/server/services/Files/MistralOCR/crud.js
Normal file
207
api/server/services/Files/MistralOCR/crud.js
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
// ~/server/services/Files/MistralOCR/crud.js
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const FormData = require('form-data');
|
||||
const { FileSources, envVarRegex, extractEnvVariable } = require('librechat-data-provider');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { logger, createAxiosInstance } = require('~/config');
|
||||
const { logAxiosError } = require('~/utils');
|
||||
|
||||
const axios = createAxiosInstance();
|
||||
|
||||
/**
|
||||
* Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory
|
||||
*
|
||||
* @param {Object} params Upload parameters
|
||||
* @param {string} params.filePath The path to the file on disk
|
||||
* @param {string} [params.fileName] Optional filename to use (defaults to the name from filePath)
|
||||
* @param {string} params.apiKey Mistral API key
|
||||
* @param {string} [params.baseURL=https://api.mistral.ai/v1] Mistral API base URL
|
||||
* @returns {Promise<Object>} The response from Mistral API
|
||||
*/
|
||||
async function uploadDocumentToMistral({
|
||||
filePath,
|
||||
fileName = '',
|
||||
apiKey,
|
||||
baseURL = 'https://api.mistral.ai/v1',
|
||||
}) {
|
||||
const form = new FormData();
|
||||
form.append('purpose', 'ocr');
|
||||
const actualFileName = fileName || path.basename(filePath);
|
||||
const fileStream = fs.createReadStream(filePath);
|
||||
form.append('file', fileStream, { filename: actualFileName });
|
||||
|
||||
return axios
|
||||
.post(`${baseURL}/files`, form, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
...form.getHeaders(),
|
||||
},
|
||||
maxBodyLength: Infinity,
|
||||
maxContentLength: Infinity,
|
||||
})
|
||||
.then((res) => res.data)
|
||||
.catch((error) => {
|
||||
logger.error('Error uploading document to Mistral:', error.message);
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
|
||||
async function getSignedUrl({
|
||||
apiKey,
|
||||
fileId,
|
||||
expiry = 24,
|
||||
baseURL = 'https://api.mistral.ai/v1',
|
||||
}) {
|
||||
return axios
|
||||
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
})
|
||||
.then((res) => res.data)
|
||||
.catch((error) => {
|
||||
logger.error('Error fetching signed URL:', error.message);
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Object} params
|
||||
* @param {string} params.apiKey
|
||||
* @param {string} params.documentUrl
|
||||
* @param {string} [params.baseURL]
|
||||
* @returns {Promise<OCRResult>}
|
||||
*/
|
||||
async function performOCR({
|
||||
apiKey,
|
||||
documentUrl,
|
||||
model = 'mistral-ocr-latest',
|
||||
baseURL = 'https://api.mistral.ai/v1',
|
||||
}) {
|
||||
return axios
|
||||
.post(
|
||||
`${baseURL}/ocr`,
|
||||
{
|
||||
model,
|
||||
include_image_base64: false,
|
||||
document: {
|
||||
type: 'document_url',
|
||||
document_url: documentUrl,
|
||||
},
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
},
|
||||
)
|
||||
.then((res) => res.data)
|
||||
.catch((error) => {
|
||||
logger.error('Error performing OCR:', error.message);
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
|
||||
function extractVariableName(str) {
|
||||
const match = str.match(envVarRegex);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
|
||||
try {
|
||||
/** @type {TCustomConfig['ocr']} */
|
||||
const ocrConfig = req.app.locals?.ocr;
|
||||
|
||||
const apiKeyConfig = ocrConfig.apiKey || '';
|
||||
const baseURLConfig = ocrConfig.baseURL || '';
|
||||
|
||||
const isApiKeyEnvVar = envVarRegex.test(apiKeyConfig);
|
||||
const isBaseURLEnvVar = envVarRegex.test(baseURLConfig);
|
||||
|
||||
const isApiKeyEmpty = !apiKeyConfig.trim();
|
||||
const isBaseURLEmpty = !baseURLConfig.trim();
|
||||
|
||||
let apiKey, baseURL;
|
||||
|
||||
if (isApiKeyEnvVar || isBaseURLEnvVar || isApiKeyEmpty || isBaseURLEmpty) {
|
||||
const apiKeyVarName = isApiKeyEnvVar ? extractVariableName(apiKeyConfig) : 'OCR_API_KEY';
|
||||
const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL';
|
||||
|
||||
const authValues = await loadAuthValues({
|
||||
userId: req.user.id,
|
||||
authFields: [baseURLVarName, apiKeyVarName],
|
||||
optional: new Set([baseURLVarName]),
|
||||
});
|
||||
|
||||
apiKey = authValues[apiKeyVarName];
|
||||
baseURL = authValues[baseURLVarName];
|
||||
} else {
|
||||
apiKey = apiKeyConfig;
|
||||
baseURL = baseURLConfig;
|
||||
}
|
||||
|
||||
const mistralFile = await uploadDocumentToMistral({
|
||||
filePath: file.path,
|
||||
fileName: file.originalname,
|
||||
apiKey,
|
||||
baseURL,
|
||||
});
|
||||
|
||||
const modelConfig = ocrConfig.mistralModel || '';
|
||||
const model = envVarRegex.test(modelConfig)
|
||||
? extractEnvVariable(modelConfig)
|
||||
: modelConfig.trim() || 'mistral-ocr-latest';
|
||||
|
||||
const signedUrlResponse = await getSignedUrl({
|
||||
apiKey,
|
||||
baseURL,
|
||||
fileId: mistralFile.id,
|
||||
});
|
||||
|
||||
const ocrResult = await performOCR({
|
||||
apiKey,
|
||||
baseURL,
|
||||
model,
|
||||
documentUrl: signedUrlResponse.url,
|
||||
});
|
||||
|
||||
let aggregatedText = '';
|
||||
const images = [];
|
||||
ocrResult.pages.forEach((page, index) => {
|
||||
if (ocrResult.pages.length > 1) {
|
||||
aggregatedText += `# PAGE ${index + 1}\n`;
|
||||
}
|
||||
|
||||
aggregatedText += page.markdown + '\n\n';
|
||||
|
||||
if (page.images && page.images.length > 0) {
|
||||
page.images.forEach((image) => {
|
||||
if (image.image_base64) {
|
||||
images.push(image.image_base64);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
filename: file.originalname,
|
||||
bytes: aggregatedText.length * 4,
|
||||
filepath: FileSources.mistral_ocr,
|
||||
text: aggregatedText,
|
||||
images,
|
||||
};
|
||||
} catch (error) {
|
||||
const message = 'Error uploading document to Mistral OCR API';
|
||||
logAxiosError({ error, message });
|
||||
throw new Error(message);
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
uploadDocumentToMistral,
|
||||
uploadMistralOCR,
|
||||
getSignedUrl,
|
||||
performOCR,
|
||||
};
|
||||
737
api/server/services/Files/MistralOCR/crud.spec.js
Normal file
737
api/server/services/Files/MistralOCR/crud.spec.js
Normal file
|
|
@ -0,0 +1,737 @@
|
|||
const fs = require('fs');
|
||||
|
||||
const mockAxios = {
|
||||
interceptors: {
|
||||
request: { use: jest.fn(), eject: jest.fn() },
|
||||
response: { use: jest.fn(), eject: jest.fn() },
|
||||
},
|
||||
create: jest.fn().mockReturnValue({
|
||||
defaults: {
|
||||
proxy: null,
|
||||
},
|
||||
get: jest.fn().mockResolvedValue({ data: {} }),
|
||||
post: jest.fn().mockResolvedValue({ data: {} }),
|
||||
put: jest.fn().mockResolvedValue({ data: {} }),
|
||||
delete: jest.fn().mockResolvedValue({ data: {} }),
|
||||
}),
|
||||
get: jest.fn().mockResolvedValue({ data: {} }),
|
||||
post: jest.fn().mockResolvedValue({ data: {} }),
|
||||
put: jest.fn().mockResolvedValue({ data: {} }),
|
||||
delete: jest.fn().mockResolvedValue({ data: {} }),
|
||||
reset: jest.fn().mockImplementation(function () {
|
||||
this.get.mockClear();
|
||||
this.post.mockClear();
|
||||
this.put.mockClear();
|
||||
this.delete.mockClear();
|
||||
this.create.mockClear();
|
||||
}),
|
||||
};
|
||||
|
||||
jest.mock('axios', () => mockAxios);
|
||||
jest.mock('fs');
|
||||
jest.mock('~/utils', () => ({
|
||||
logAxiosError: jest.fn(),
|
||||
}));
|
||||
jest.mock('~/config', () => ({
|
||||
logger: {
|
||||
error: jest.fn(),
|
||||
},
|
||||
createAxiosInstance: () => mockAxios,
|
||||
}));
|
||||
jest.mock('~/server/services/Tools/credentials', () => ({
|
||||
loadAuthValues: jest.fn(),
|
||||
}));
|
||||
|
||||
const { uploadDocumentToMistral, uploadMistralOCR, getSignedUrl, performOCR } = require('./crud');
|
||||
|
||||
describe('MistralOCR Service', () => {
|
||||
afterEach(() => {
|
||||
mockAxios.reset();
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('uploadDocumentToMistral', () => {
|
||||
beforeEach(() => {
|
||||
// Create a more complete mock for file streams that FormData can work with
|
||||
const mockReadStream = {
|
||||
on: jest.fn().mockImplementation(function (event, handler) {
|
||||
// Simulate immediate 'end' event to make FormData complete processing
|
||||
if (event === 'end') {
|
||||
handler();
|
||||
}
|
||||
return this;
|
||||
}),
|
||||
pipe: jest.fn().mockImplementation(function () {
|
||||
return this;
|
||||
}),
|
||||
pause: jest.fn(),
|
||||
resume: jest.fn(),
|
||||
emit: jest.fn(),
|
||||
once: jest.fn(),
|
||||
destroy: jest.fn(),
|
||||
};
|
||||
|
||||
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
|
||||
|
||||
// Mock FormData's append to avoid actual stream processing
|
||||
jest.mock('form-data', () => {
|
||||
const mockFormData = function () {
|
||||
return {
|
||||
append: jest.fn(),
|
||||
getHeaders: jest
|
||||
.fn()
|
||||
.mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }),
|
||||
getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')),
|
||||
getLength: jest.fn().mockReturnValue(100),
|
||||
};
|
||||
};
|
||||
return mockFormData;
|
||||
});
|
||||
});
|
||||
|
||||
it('should upload a document to Mistral API using file streaming', async () => {
|
||||
const mockResponse = { data: { id: 'file-123', purpose: 'ocr' } };
|
||||
mockAxios.post.mockResolvedValueOnce(mockResponse);
|
||||
|
||||
const result = await uploadDocumentToMistral({
|
||||
filePath: '/path/to/test.pdf',
|
||||
fileName: 'test.pdf',
|
||||
apiKey: 'test-api-key',
|
||||
});
|
||||
|
||||
// Check that createReadStream was called with the correct file path
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/path/to/test.pdf');
|
||||
|
||||
// Since we're mocking FormData, we'll just check that axios was called correctly
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/files',
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Authorization: 'Bearer test-api-key',
|
||||
}),
|
||||
maxBodyLength: Infinity,
|
||||
maxContentLength: Infinity,
|
||||
}),
|
||||
);
|
||||
expect(result).toEqual(mockResponse.data);
|
||||
});
|
||||
|
||||
it('should handle errors during document upload', async () => {
|
||||
const errorMessage = 'API error';
|
||||
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
|
||||
|
||||
await expect(
|
||||
uploadDocumentToMistral({
|
||||
filePath: '/path/to/test.pdf',
|
||||
fileName: 'test.pdf',
|
||||
apiKey: 'test-api-key',
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
|
||||
const { logger } = require('~/config');
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
expect.stringContaining('Error uploading document to Mistral:'),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getSignedUrl', () => {
|
||||
it('should fetch signed URL from Mistral API', async () => {
|
||||
const mockResponse = { data: { url: 'https://document-url.com' } };
|
||||
mockAxios.get.mockResolvedValueOnce(mockResponse);
|
||||
|
||||
const result = await getSignedUrl({
|
||||
fileId: 'file-123',
|
||||
apiKey: 'test-api-key',
|
||||
});
|
||||
|
||||
expect(mockAxios.get).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/files/file-123/url?expiry=24',
|
||||
{
|
||||
headers: {
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
},
|
||||
);
|
||||
expect(result).toEqual(mockResponse.data);
|
||||
});
|
||||
|
||||
it('should handle errors when fetching signed URL', async () => {
|
||||
const errorMessage = 'API error';
|
||||
mockAxios.get.mockRejectedValueOnce(new Error(errorMessage));
|
||||
|
||||
await expect(
|
||||
getSignedUrl({
|
||||
fileId: 'file-123',
|
||||
apiKey: 'test-api-key',
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
|
||||
const { logger } = require('~/config');
|
||||
expect(logger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage);
|
||||
});
|
||||
});
|
||||
|
||||
describe('performOCR', () => {
|
||||
it('should perform OCR using Mistral API', async () => {
|
||||
const mockResponse = {
|
||||
data: {
|
||||
pages: [{ markdown: 'Page 1 content' }, { markdown: 'Page 2 content' }],
|
||||
},
|
||||
};
|
||||
mockAxios.post.mockResolvedValueOnce(mockResponse);
|
||||
|
||||
const result = await performOCR({
|
||||
apiKey: 'test-api-key',
|
||||
documentUrl: 'https://document-url.com',
|
||||
model: 'mistral-ocr-latest',
|
||||
});
|
||||
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/ocr',
|
||||
{
|
||||
model: 'mistral-ocr-latest',
|
||||
include_image_base64: false,
|
||||
document: {
|
||||
type: 'document_url',
|
||||
document_url: 'https://document-url.com',
|
||||
},
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
},
|
||||
);
|
||||
expect(result).toEqual(mockResponse.data);
|
||||
});
|
||||
|
||||
it('should handle errors during OCR processing', async () => {
|
||||
const errorMessage = 'OCR processing error';
|
||||
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
|
||||
|
||||
await expect(
|
||||
performOCR({
|
||||
apiKey: 'test-api-key',
|
||||
documentUrl: 'https://document-url.com',
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
|
||||
const { logger } = require('~/config');
|
||||
expect(logger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage);
|
||||
});
|
||||
});
|
||||
|
||||
describe('uploadMistralOCR', () => {
|
||||
beforeEach(() => {
|
||||
const mockReadStream = {
|
||||
on: jest.fn().mockImplementation(function (event, handler) {
|
||||
if (event === 'end') {
|
||||
handler();
|
||||
}
|
||||
return this;
|
||||
}),
|
||||
pipe: jest.fn().mockImplementation(function () {
|
||||
return this;
|
||||
}),
|
||||
pause: jest.fn(),
|
||||
resume: jest.fn(),
|
||||
emit: jest.fn(),
|
||||
once: jest.fn(),
|
||||
destroy: jest.fn(),
|
||||
};
|
||||
|
||||
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
|
||||
});
|
||||
|
||||
it('should process OCR for a file with standard configuration', async () => {
|
||||
// Setup mocks
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock file upload response
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: { id: 'file-123', purpose: 'ocr' },
|
||||
});
|
||||
|
||||
// Mock signed URL response
|
||||
mockAxios.get.mockResolvedValueOnce({
|
||||
data: { url: 'https://signed-url.com' },
|
||||
});
|
||||
|
||||
// Mock OCR response with text and images
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: {
|
||||
pages: [
|
||||
{
|
||||
markdown: 'Page 1 content',
|
||||
images: [{ image_base64: 'base64image1' }],
|
||||
},
|
||||
{
|
||||
markdown: 'Page 2 content',
|
||||
images: [{ image_base64: 'base64image2' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Use environment variable syntax to ensure loadAuthValues is called
|
||||
apiKey: '${OCR_API_KEY}',
|
||||
baseURL: '${OCR_BASEURL}',
|
||||
mistralModel: 'mistral-medium',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Verify OCR result
|
||||
expect(result).toEqual({
|
||||
filename: 'document.pdf',
|
||||
bytes: expect.any(Number),
|
||||
filepath: 'mistral_ocr',
|
||||
text: expect.stringContaining('# PAGE 1'),
|
||||
images: ['base64image1', 'base64image2'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should process variable references in configuration', async () => {
|
||||
// Setup mocks with environment variables
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
CUSTOM_API_KEY: 'custom-api-key',
|
||||
CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock API responses
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: { id: 'file-123', purpose: 'ocr' },
|
||||
});
|
||||
mockAxios.get.mockResolvedValueOnce({
|
||||
data: { url: 'https://signed-url.com' },
|
||||
});
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: {
|
||||
pages: [{ markdown: 'Content from custom API' }],
|
||||
},
|
||||
});
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: '${CUSTOM_API_KEY}',
|
||||
baseURL: '${CUSTOM_BASEURL}',
|
||||
mistralModel: '${CUSTOM_MODEL}',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
// Set environment variable for model
|
||||
process.env.CUSTOM_MODEL = 'mistral-large';
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify that custom environment variables were extracted and used
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Check that mistral-large was used in the OCR API call
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
model: 'mistral-large',
|
||||
}),
|
||||
expect.anything(),
|
||||
);
|
||||
|
||||
expect(result.text).toEqual('Content from custom API\n\n');
|
||||
});
|
||||
|
||||
it('should fall back to default values when variables are not properly formatted', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'default-api-key',
|
||||
OCR_BASEURL: undefined, // Testing optional parameter
|
||||
});
|
||||
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: { id: 'file-123', purpose: 'ocr' },
|
||||
});
|
||||
mockAxios.get.mockResolvedValueOnce({
|
||||
data: { url: 'https://signed-url.com' },
|
||||
});
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
data: {
|
||||
pages: [{ markdown: 'Default API result' }],
|
||||
},
|
||||
});
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Use environment variable syntax to ensure loadAuthValues is called
|
||||
apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name
|
||||
baseURL: '${OCR_BASEURL}', // Using valid env var format
|
||||
mistralModel: 'mistral-ocr-latest', // Plain string value
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Should use the default values
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['OCR_BASEURL', 'INVALID_FORMAT'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Should use the default model when not using environment variable format
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
expect.anything(),
|
||||
expect.objectContaining({
|
||||
model: 'mistral-ocr-latest',
|
||||
}),
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle API errors during OCR process', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
});
|
||||
|
||||
// Mock file upload to fail
|
||||
mockAxios.post.mockRejectedValueOnce(new Error('Upload failed'));
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: 'OCR_API_KEY',
|
||||
baseURL: 'OCR_BASEURL',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
};
|
||||
|
||||
await expect(
|
||||
uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
}),
|
||||
).rejects.toThrow('Error uploading document to Mistral OCR API');
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
const { logAxiosError } = require('~/utils');
|
||||
expect(logAxiosError).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle single page documents without page numbering', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included
|
||||
});
|
||||
|
||||
// Clear all previous mocks
|
||||
mockAxios.post.mockClear();
|
||||
mockAxios.get.mockClear();
|
||||
|
||||
// 1. First mock: File upload response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
|
||||
);
|
||||
|
||||
// 2. Second mock: Signed URL response
|
||||
mockAxios.get.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
|
||||
);
|
||||
|
||||
// 3. Third mock: OCR response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({
|
||||
data: {
|
||||
pages: [{ markdown: 'Single page content' }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: 'OCR_API_KEY',
|
||||
baseURL: 'OCR_BASEURL',
|
||||
mistralModel: 'mistral-ocr-latest',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'single-page.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify that single page documents don't include page numbering
|
||||
expect(result.text).not.toContain('# PAGE');
|
||||
expect(result.text).toEqual('Single page content\n\n');
|
||||
});
|
||||
|
||||
it('should use literal values in configuration when provided directly', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
// We'll still mock this but it should not be used for literal values
|
||||
loadAuthValues.mockResolvedValue({});
|
||||
|
||||
// Clear all previous mocks
|
||||
mockAxios.post.mockClear();
|
||||
mockAxios.get.mockClear();
|
||||
|
||||
// 1. First mock: File upload response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
|
||||
);
|
||||
|
||||
// 2. Second mock: Signed URL response
|
||||
mockAxios.get.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
|
||||
);
|
||||
|
||||
// 3. Third mock: OCR response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({
|
||||
data: {
|
||||
pages: [{ markdown: 'Processed with literal config values' }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Direct values that should be used as-is, without variable substitution
|
||||
apiKey: 'actual-api-key-value',
|
||||
baseURL: 'https://direct-api-url.mistral.ai/v1',
|
||||
mistralModel: 'mistral-direct-model',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'direct-values.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify the correct URL was used with the direct baseURL value
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://direct-api-url.mistral.ai/v1/files',
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Authorization: 'Bearer actual-api-key-value',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
// Check the OCR call was made with the direct model value
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://direct-api-url.mistral.ai/v1/ocr',
|
||||
expect.objectContaining({
|
||||
model: 'mistral-direct-model',
|
||||
}),
|
||||
expect.any(Object),
|
||||
);
|
||||
|
||||
// Verify the result
|
||||
expect(result.text).toEqual('Processed with literal config values\n\n');
|
||||
|
||||
// Verify loadAuthValues was never called since we used direct values
|
||||
expect(loadAuthValues).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle empty configuration values and use defaults', async () => {
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
// Set up the mock values to be returned by loadAuthValues
|
||||
loadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'default-from-env-key',
|
||||
OCR_BASEURL: 'https://default-from-env.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Clear all previous mocks
|
||||
mockAxios.post.mockClear();
|
||||
mockAxios.get.mockClear();
|
||||
|
||||
// 1. First mock: File upload response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
|
||||
);
|
||||
|
||||
// 2. Second mock: Signed URL response
|
||||
mockAxios.get.mockImplementationOnce(() =>
|
||||
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
|
||||
);
|
||||
|
||||
// 3. Third mock: OCR response
|
||||
mockAxios.post.mockImplementationOnce(() =>
|
||||
Promise.resolve({
|
||||
data: {
|
||||
pages: [{ markdown: 'Content from default configuration' }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
// Empty string values - should fall back to defaults
|
||||
apiKey: '',
|
||||
baseURL: '',
|
||||
mistralModel: '',
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'empty-config.pdf',
|
||||
};
|
||||
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
file_id: 'file123',
|
||||
entity_id: 'entity123',
|
||||
});
|
||||
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
|
||||
|
||||
// Verify loadAuthValues was called with the default variable names
|
||||
expect(loadAuthValues).toHaveBeenCalledWith({
|
||||
userId: 'user123',
|
||||
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
|
||||
optional: expect.any(Set),
|
||||
});
|
||||
|
||||
// Verify the API calls used the default values from loadAuthValues
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://default-from-env.mistral.ai/v1/files',
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Authorization: 'Bearer default-from-env-key',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
// Verify the OCR model defaulted to mistral-ocr-latest
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://default-from-env.mistral.ai/v1/ocr',
|
||||
expect.objectContaining({
|
||||
model: 'mistral-ocr-latest',
|
||||
}),
|
||||
expect.any(Object),
|
||||
);
|
||||
|
||||
// Check result
|
||||
expect(result.text).toEqual('Content from default configuration\n\n');
|
||||
});
|
||||
});
|
||||
});
|
||||
5
api/server/services/Files/MistralOCR/index.js
Normal file
5
api/server/services/Files/MistralOCR/index.js
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
const crud = require('./crud');
|
||||
|
||||
module.exports = {
|
||||
...crud,
|
||||
};
|
||||
|
|
@ -49,6 +49,7 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
|||
const promises = [];
|
||||
const encodingMethods = {};
|
||||
const result = {
|
||||
text: '',
|
||||
files: [],
|
||||
image_urls: [],
|
||||
};
|
||||
|
|
@ -59,6 +60,9 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
|||
|
||||
for (let file of files) {
|
||||
const source = file.source ?? FileSources.local;
|
||||
if (source === FileSources.text && file.text) {
|
||||
result.text += `${!result.text ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${file.text}\n`;
|
||||
}
|
||||
|
||||
if (!file.height) {
|
||||
promises.push([file, null]);
|
||||
|
|
@ -85,6 +89,10 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
|||
promises.push(preparePayload(req, file));
|
||||
}
|
||||
|
||||
if (result.text) {
|
||||
result.text += '\n```';
|
||||
}
|
||||
|
||||
const detail = req.body.imageDetail ?? ImageDetail.auto;
|
||||
|
||||
/** @type {Array<[MongoFile, string]>} */
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ const { addResourceFileId, deleteResourceFileId } = require('~/server/controller
|
|||
const { addAgentResourceFile, removeAgentResourceFiles } = require('~/models/Agent');
|
||||
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
|
||||
const { createFile, updateFileUsage, deleteFiles } = require('~/models/File');
|
||||
const { loadAuthValues } = require('~/server/services/Tools/credentials');
|
||||
const { getEndpointsConfig } = require('~/server/services/Config');
|
||||
const { loadAuthValues } = require('~/app/clients/tools/util');
|
||||
const { LB_QueueAsyncCall } = require('~/server/utils/queue');
|
||||
const { getStrategyFunctions } = require('./strategies');
|
||||
const { determineFileType } = require('~/server/utils');
|
||||
|
|
@ -162,7 +162,6 @@ const processDeleteRequest = async ({ req, files }) => {
|
|||
|
||||
for (const file of files) {
|
||||
const source = file.source ?? FileSources.local;
|
||||
|
||||
if (req.body.agent_id && req.body.tool_resource) {
|
||||
agentFiles.push({
|
||||
tool_resource: req.body.tool_resource,
|
||||
|
|
@ -170,6 +169,11 @@ const processDeleteRequest = async ({ req, files }) => {
|
|||
});
|
||||
}
|
||||
|
||||
if (source === FileSources.text) {
|
||||
resolvedFileIds.push(file.file_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (checkOpenAIStorage(source) && !client[source]) {
|
||||
await initializeClients();
|
||||
}
|
||||
|
|
@ -521,6 +525,52 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
|||
if (!isFileSearchEnabled) {
|
||||
throw new Error('File search is not enabled for Agents');
|
||||
}
|
||||
} else if (tool_resource === EToolResources.ocr) {
|
||||
const isOCREnabled = await checkCapability(req, AgentCapabilities.ocr);
|
||||
if (!isOCREnabled) {
|
||||
throw new Error('OCR capability is not enabled for Agents');
|
||||
}
|
||||
|
||||
const { handleFileUpload } = getStrategyFunctions(
|
||||
req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr,
|
||||
);
|
||||
const { file_id, temp_file_id } = metadata;
|
||||
|
||||
const {
|
||||
text,
|
||||
bytes,
|
||||
// TODO: OCR images support?
|
||||
images,
|
||||
filename,
|
||||
filepath: ocrFileURL,
|
||||
} = await handleFileUpload({ req, file, file_id, entity_id: agent_id });
|
||||
|
||||
const fileInfo = removeNullishValues({
|
||||
text,
|
||||
bytes,
|
||||
file_id,
|
||||
temp_file_id,
|
||||
user: req.user.id,
|
||||
type: file.mimetype,
|
||||
filepath: ocrFileURL,
|
||||
source: FileSources.text,
|
||||
filename: filename ?? file.originalname,
|
||||
model: messageAttachment ? undefined : req.body.model,
|
||||
context: messageAttachment ? FileContext.message_attachment : FileContext.agents,
|
||||
});
|
||||
|
||||
if (!messageAttachment && tool_resource) {
|
||||
await addAgentResourceFile({
|
||||
req,
|
||||
file_id,
|
||||
agent_id,
|
||||
tool_resource,
|
||||
});
|
||||
}
|
||||
const result = await createFile(fileInfo, true);
|
||||
return res
|
||||
.status(200)
|
||||
.json({ message: 'Agent file uploaded and processed successfully', ...result });
|
||||
}
|
||||
|
||||
const source =
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ const {
|
|||
const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./OpenAI');
|
||||
const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code');
|
||||
const { uploadVectors, deleteVectors } = require('./VectorDB');
|
||||
const { uploadMistralOCR } = require('./MistralOCR');
|
||||
|
||||
/**
|
||||
* Firebase Storage Strategy Functions
|
||||
|
|
@ -127,6 +128,26 @@ const codeOutputStrategy = () => ({
|
|||
getDownloadStream: getCodeOutputDownloadStream,
|
||||
});
|
||||
|
||||
const mistralOCRStrategy = () => ({
|
||||
/** @type {typeof saveFileFromURL | null} */
|
||||
saveURL: null,
|
||||
/** @type {typeof getLocalFileURL | null} */
|
||||
getFileURL: null,
|
||||
/** @type {typeof saveLocalBuffer | null} */
|
||||
saveBuffer: null,
|
||||
/** @type {typeof processLocalAvatar | null} */
|
||||
processAvatar: null,
|
||||
/** @type {typeof uploadLocalImage | null} */
|
||||
handleImageUpload: null,
|
||||
/** @type {typeof prepareImagesLocal | null} */
|
||||
prepareImagePayload: null,
|
||||
/** @type {typeof deleteLocalFile | null} */
|
||||
deleteFile: null,
|
||||
/** @type {typeof getLocalFileStream | null} */
|
||||
getDownloadStream: null,
|
||||
handleFileUpload: uploadMistralOCR,
|
||||
});
|
||||
|
||||
// Strategy Selector
|
||||
const getStrategyFunctions = (fileSource) => {
|
||||
if (fileSource === FileSources.firebase) {
|
||||
|
|
@ -141,6 +162,8 @@ const getStrategyFunctions = (fileSource) => {
|
|||
return vectorStrategy();
|
||||
} else if (fileSource === FileSources.execute_code) {
|
||||
return codeOutputStrategy();
|
||||
} else if (fileSource === FileSources.mistral_ocr) {
|
||||
return mistralOCRStrategy();
|
||||
} else {
|
||||
throw new Error('Invalid file source');
|
||||
}
|
||||
|
|
|
|||
56
api/server/services/Tools/credentials.js
Normal file
56
api/server/services/Tools/credentials.js
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
const { getUserPluginAuthValue } = require('~/server/services/PluginService');
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Object} params
|
||||
* @param {string} params.userId
|
||||
* @param {string[]} params.authFields
|
||||
* @param {Set<string>} [params.optional]
|
||||
* @param {boolean} [params.throwError]
|
||||
* @returns
|
||||
*/
|
||||
const loadAuthValues = async ({ userId, authFields, optional, throwError = true }) => {
|
||||
let authValues = {};
|
||||
|
||||
/**
|
||||
* Finds the first non-empty value for the given authentication field, supporting alternate fields.
|
||||
* @param {string[]} fields Array of strings representing the authentication fields. Supports alternate fields delimited by "||".
|
||||
* @returns {Promise<{ authField: string, authValue: string} | null>} An object containing the authentication field and value, or null if not found.
|
||||
*/
|
||||
const findAuthValue = async (fields) => {
|
||||
for (const field of fields) {
|
||||
let value = process.env[field];
|
||||
if (value) {
|
||||
return { authField: field, authValue: value };
|
||||
}
|
||||
try {
|
||||
value = await getUserPluginAuthValue(userId, field, throwError);
|
||||
} catch (err) {
|
||||
if (optional && optional.has(field)) {
|
||||
return { authField: field, authValue: undefined };
|
||||
}
|
||||
if (field === fields[fields.length - 1] && !value) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
if (value) {
|
||||
return { authField: field, authValue: value };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
for (let authField of authFields) {
|
||||
const fields = authField.split('||');
|
||||
const result = await findAuthValue(fields);
|
||||
if (result) {
|
||||
authValues[result.authField] = result.authValue;
|
||||
}
|
||||
}
|
||||
|
||||
return authValues;
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
loadAuthValues,
|
||||
};
|
||||
|
|
@ -203,6 +203,7 @@ function generateConfig(key, baseURL, endpoint) {
|
|||
AgentCapabilities.artifacts,
|
||||
AgentCapabilities.actions,
|
||||
AgentCapabilities.tools,
|
||||
AgentCapabilities.ocr,
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue