🔍 feat: Mistral OCR API / Upload Files as Text (#6274)

* refactor: move `loadAuthValues` to `~/services/Tools/credentials`

* feat: add createAxiosInstance function to configure axios with proxy support

* WIP: First pass mistral ocr

* refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic

* refactor: improve document formatting in encodeAndFormat function

* refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config)

* fix: update getFiles call to include files with `text` property as well

* refactor: move file handling to `initializeAgentOptions`

* refactor: enhance addImageURLs method to handle OCR text and improve message formatting

* refactor: update message formatting to handle OCR text in various content types

* refactor: remove unused resendFiles property from compactAgentsSchema

* fix: add error handling for Mistral OCR document upload and logging

* refactor: integrate OCR capability into file upload options and configuration

* refactor: skip processing for text source files in delete request, as they are directly tied to database

* feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling

* fix: source icon styling

* wip: first pass, frontend file context agent resources

* refactor: add hover card with contextual information for File Context (OCR) in FileContext component

* feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization

* feat: implement OCR config; fix: agent resource deletion for ocr files

* feat: enhance agent initialization by adding OCR capability check in resource priming

* ci: fix `~/config` module mock

* ci: add OCR property expectation in AppService tests

* refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed

* ci: add unit test to ensure environment variable references are not parsed in OCR config

* refactor: disable base64 image inclusion in OCR request

* refactor: enhance OCR configuration handling by validating environment variables and providing defaults

* refactor: use file stream from disk for mistral ocr api
This commit is contained in:
Danny Avila 2025-03-10 17:23:46 -04:00 committed by GitHub
parent 9db00edfc4
commit ded3cd8876
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 1621 additions and 131 deletions

View file

@ -0,0 +1,207 @@
// ~/server/services/Files/MistralOCR/crud.js
const fs = require('fs');
const path = require('path');
const FormData = require('form-data');
const { FileSources, envVarRegex, extractEnvVariable } = require('librechat-data-provider');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { logger, createAxiosInstance } = require('~/config');
const { logAxiosError } = require('~/utils');
const axios = createAxiosInstance();
/**
* Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory
*
* @param {Object} params Upload parameters
* @param {string} params.filePath The path to the file on disk
* @param {string} [params.fileName] Optional filename to use (defaults to the name from filePath)
* @param {string} params.apiKey Mistral API key
* @param {string} [params.baseURL=https://api.mistral.ai/v1] Mistral API base URL
* @returns {Promise<Object>} The response from Mistral API
*/
async function uploadDocumentToMistral({
filePath,
fileName = '',
apiKey,
baseURL = 'https://api.mistral.ai/v1',
}) {
const form = new FormData();
form.append('purpose', 'ocr');
const actualFileName = fileName || path.basename(filePath);
const fileStream = fs.createReadStream(filePath);
form.append('file', fileStream, { filename: actualFileName });
return axios
.post(`${baseURL}/files`, form, {
headers: {
Authorization: `Bearer ${apiKey}`,
...form.getHeaders(),
},
maxBodyLength: Infinity,
maxContentLength: Infinity,
})
.then((res) => res.data)
.catch((error) => {
logger.error('Error uploading document to Mistral:', error.message);
throw error;
});
}
async function getSignedUrl({
apiKey,
fileId,
expiry = 24,
baseURL = 'https://api.mistral.ai/v1',
}) {
return axios
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
headers: {
Authorization: `Bearer ${apiKey}`,
},
})
.then((res) => res.data)
.catch((error) => {
logger.error('Error fetching signed URL:', error.message);
throw error;
});
}
/**
* @param {Object} params
* @param {string} params.apiKey
* @param {string} params.documentUrl
* @param {string} [params.baseURL]
* @returns {Promise<OCRResult>}
*/
async function performOCR({
apiKey,
documentUrl,
model = 'mistral-ocr-latest',
baseURL = 'https://api.mistral.ai/v1',
}) {
return axios
.post(
`${baseURL}/ocr`,
{
model,
include_image_base64: false,
document: {
type: 'document_url',
document_url: documentUrl,
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
},
)
.then((res) => res.data)
.catch((error) => {
logger.error('Error performing OCR:', error.message);
throw error;
});
}
function extractVariableName(str) {
const match = str.match(envVarRegex);
return match ? match[1] : null;
}
const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
try {
/** @type {TCustomConfig['ocr']} */
const ocrConfig = req.app.locals?.ocr;
const apiKeyConfig = ocrConfig.apiKey || '';
const baseURLConfig = ocrConfig.baseURL || '';
const isApiKeyEnvVar = envVarRegex.test(apiKeyConfig);
const isBaseURLEnvVar = envVarRegex.test(baseURLConfig);
const isApiKeyEmpty = !apiKeyConfig.trim();
const isBaseURLEmpty = !baseURLConfig.trim();
let apiKey, baseURL;
if (isApiKeyEnvVar || isBaseURLEnvVar || isApiKeyEmpty || isBaseURLEmpty) {
const apiKeyVarName = isApiKeyEnvVar ? extractVariableName(apiKeyConfig) : 'OCR_API_KEY';
const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL';
const authValues = await loadAuthValues({
userId: req.user.id,
authFields: [baseURLVarName, apiKeyVarName],
optional: new Set([baseURLVarName]),
});
apiKey = authValues[apiKeyVarName];
baseURL = authValues[baseURLVarName];
} else {
apiKey = apiKeyConfig;
baseURL = baseURLConfig;
}
const mistralFile = await uploadDocumentToMistral({
filePath: file.path,
fileName: file.originalname,
apiKey,
baseURL,
});
const modelConfig = ocrConfig.mistralModel || '';
const model = envVarRegex.test(modelConfig)
? extractEnvVariable(modelConfig)
: modelConfig.trim() || 'mistral-ocr-latest';
const signedUrlResponse = await getSignedUrl({
apiKey,
baseURL,
fileId: mistralFile.id,
});
const ocrResult = await performOCR({
apiKey,
baseURL,
model,
documentUrl: signedUrlResponse.url,
});
let aggregatedText = '';
const images = [];
ocrResult.pages.forEach((page, index) => {
if (ocrResult.pages.length > 1) {
aggregatedText += `# PAGE ${index + 1}\n`;
}
aggregatedText += page.markdown + '\n\n';
if (page.images && page.images.length > 0) {
page.images.forEach((image) => {
if (image.image_base64) {
images.push(image.image_base64);
}
});
}
});
return {
filename: file.originalname,
bytes: aggregatedText.length * 4,
filepath: FileSources.mistral_ocr,
text: aggregatedText,
images,
};
} catch (error) {
const message = 'Error uploading document to Mistral OCR API';
logAxiosError({ error, message });
throw new Error(message);
}
};
module.exports = {
uploadDocumentToMistral,
uploadMistralOCR,
getSignedUrl,
performOCR,
};

View file

@ -0,0 +1,737 @@
const fs = require('fs');
const mockAxios = {
interceptors: {
request: { use: jest.fn(), eject: jest.fn() },
response: { use: jest.fn(), eject: jest.fn() },
},
create: jest.fn().mockReturnValue({
defaults: {
proxy: null,
},
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
}),
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
reset: jest.fn().mockImplementation(function () {
this.get.mockClear();
this.post.mockClear();
this.put.mockClear();
this.delete.mockClear();
this.create.mockClear();
}),
};
jest.mock('axios', () => mockAxios);
jest.mock('fs');
jest.mock('~/utils', () => ({
logAxiosError: jest.fn(),
}));
jest.mock('~/config', () => ({
logger: {
error: jest.fn(),
},
createAxiosInstance: () => mockAxios,
}));
jest.mock('~/server/services/Tools/credentials', () => ({
loadAuthValues: jest.fn(),
}));
const { uploadDocumentToMistral, uploadMistralOCR, getSignedUrl, performOCR } = require('./crud');
describe('MistralOCR Service', () => {
afterEach(() => {
mockAxios.reset();
jest.clearAllMocks();
});
describe('uploadDocumentToMistral', () => {
beforeEach(() => {
// Create a more complete mock for file streams that FormData can work with
const mockReadStream = {
on: jest.fn().mockImplementation(function (event, handler) {
// Simulate immediate 'end' event to make FormData complete processing
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function () {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
};
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
// Mock FormData's append to avoid actual stream processing
jest.mock('form-data', () => {
const mockFormData = function () {
return {
append: jest.fn(),
getHeaders: jest
.fn()
.mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }),
getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')),
getLength: jest.fn().mockReturnValue(100),
};
};
return mockFormData;
});
});
it('should upload a document to Mistral API using file streaming', async () => {
const mockResponse = { data: { id: 'file-123', purpose: 'ocr' } };
mockAxios.post.mockResolvedValueOnce(mockResponse);
const result = await uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
});
// Check that createReadStream was called with the correct file path
expect(fs.createReadStream).toHaveBeenCalledWith('/path/to/test.pdf');
// Since we're mocking FormData, we'll just check that axios was called correctly
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files',
expect.anything(),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer test-api-key',
}),
maxBodyLength: Infinity,
maxContentLength: Infinity,
}),
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors during document upload', async () => {
const errorMessage = 'API error';
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
await expect(
uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith(
expect.stringContaining('Error uploading document to Mistral:'),
expect.any(String),
);
});
});
describe('getSignedUrl', () => {
it('should fetch signed URL from Mistral API', async () => {
const mockResponse = { data: { url: 'https://document-url.com' } };
mockAxios.get.mockResolvedValueOnce(mockResponse);
const result = await getSignedUrl({
fileId: 'file-123',
apiKey: 'test-api-key',
});
expect(mockAxios.get).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-123/url?expiry=24',
{
headers: {
Authorization: 'Bearer test-api-key',
},
},
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors when fetching signed URL', async () => {
const errorMessage = 'API error';
mockAxios.get.mockRejectedValueOnce(new Error(errorMessage));
await expect(
getSignedUrl({
fileId: 'file-123',
apiKey: 'test-api-key',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage);
});
});
describe('performOCR', () => {
it('should perform OCR using Mistral API', async () => {
const mockResponse = {
data: {
pages: [{ markdown: 'Page 1 content' }, { markdown: 'Page 2 content' }],
},
};
mockAxios.post.mockResolvedValueOnce(mockResponse);
const result = await performOCR({
apiKey: 'test-api-key',
documentUrl: 'https://document-url.com',
model: 'mistral-ocr-latest',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/ocr',
{
model: 'mistral-ocr-latest',
include_image_base64: false,
document: {
type: 'document_url',
document_url: 'https://document-url.com',
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: 'Bearer test-api-key',
},
},
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors during OCR processing', async () => {
const errorMessage = 'OCR processing error';
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
await expect(
performOCR({
apiKey: 'test-api-key',
documentUrl: 'https://document-url.com',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage);
});
});
describe('uploadMistralOCR', () => {
beforeEach(() => {
const mockReadStream = {
on: jest.fn().mockImplementation(function (event, handler) {
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function () {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
};
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
});
it('should process OCR for a file with standard configuration', async () => {
// Setup mocks
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload response
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
// Mock signed URL response
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
// Mock OCR response with text and images
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [
{
markdown: 'Page 1 content',
images: [{ image_base64: 'base64image1' }],
},
{
markdown: 'Page 2 content',
images: [{ image_base64: 'base64image2' }],
},
],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Use environment variable syntax to ensure loadAuthValues is called
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-medium',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
optional: expect.any(Set),
});
// Verify OCR result
expect(result).toEqual({
filename: 'document.pdf',
bytes: expect.any(Number),
filepath: 'mistral_ocr',
text: expect.stringContaining('# PAGE 1'),
images: ['base64image1', 'base64image2'],
});
});
it('should process variable references in configuration', async () => {
// Setup mocks with environment variables
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
CUSTOM_API_KEY: 'custom-api-key',
CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1',
});
// Mock API responses
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [{ markdown: 'Content from custom API' }],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: '${CUSTOM_API_KEY}',
baseURL: '${CUSTOM_BASEURL}',
mistralModel: '${CUSTOM_MODEL}',
},
},
},
};
// Set environment variable for model
process.env.CUSTOM_MODEL = 'mistral-large';
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify that custom environment variables were extracted and used
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'],
optional: expect.any(Set),
});
// Check that mistral-large was used in the OCR API call
expect(mockAxios.post).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
model: 'mistral-large',
}),
expect.anything(),
);
expect(result.text).toEqual('Content from custom API\n\n');
});
it('should fall back to default values when variables are not properly formatted', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'default-api-key',
OCR_BASEURL: undefined, // Testing optional parameter
});
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [{ markdown: 'Default API result' }],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Use environment variable syntax to ensure loadAuthValues is called
apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name
baseURL: '${OCR_BASEURL}', // Using valid env var format
mistralModel: 'mistral-ocr-latest', // Plain string value
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Should use the default values
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'INVALID_FORMAT'],
optional: expect.any(Set),
});
// Should use the default model when not using environment variable format
expect(mockAxios.post).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
model: 'mistral-ocr-latest',
}),
expect.anything(),
);
});
it('should handle API errors during OCR process', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
});
// Mock file upload to fail
mockAxios.post.mockRejectedValueOnce(new Error('Upload failed'));
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: 'OCR_API_KEY',
baseURL: 'OCR_BASEURL',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
await expect(
uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
}),
).rejects.toThrow('Error uploading document to Mistral OCR API');
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
const { logAxiosError } = require('~/utils');
expect(logAxiosError).toHaveBeenCalled();
});
it('should handle single page documents without page numbering', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included
});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Single page content' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: 'OCR_API_KEY',
baseURL: 'OCR_BASEURL',
mistralModel: 'mistral-ocr-latest',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'single-page.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify that single page documents don't include page numbering
expect(result.text).not.toContain('# PAGE');
expect(result.text).toEqual('Single page content\n\n');
});
it('should use literal values in configuration when provided directly', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
// We'll still mock this but it should not be used for literal values
loadAuthValues.mockResolvedValue({});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Processed with literal config values' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Direct values that should be used as-is, without variable substitution
apiKey: 'actual-api-key-value',
baseURL: 'https://direct-api-url.mistral.ai/v1',
mistralModel: 'mistral-direct-model',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'direct-values.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify the correct URL was used with the direct baseURL value
expect(mockAxios.post).toHaveBeenCalledWith(
'https://direct-api-url.mistral.ai/v1/files',
expect.any(Object),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer actual-api-key-value',
}),
}),
);
// Check the OCR call was made with the direct model value
expect(mockAxios.post).toHaveBeenCalledWith(
'https://direct-api-url.mistral.ai/v1/ocr',
expect.objectContaining({
model: 'mistral-direct-model',
}),
expect.any(Object),
);
// Verify the result
expect(result.text).toEqual('Processed with literal config values\n\n');
// Verify loadAuthValues was never called since we used direct values
expect(loadAuthValues).not.toHaveBeenCalled();
});
it('should handle empty configuration values and use defaults', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
// Set up the mock values to be returned by loadAuthValues
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'default-from-env-key',
OCR_BASEURL: 'https://default-from-env.mistral.ai/v1',
});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Content from default configuration' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Empty string values - should fall back to defaults
apiKey: '',
baseURL: '',
mistralModel: '',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'empty-config.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify loadAuthValues was called with the default variable names
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
optional: expect.any(Set),
});
// Verify the API calls used the default values from loadAuthValues
expect(mockAxios.post).toHaveBeenCalledWith(
'https://default-from-env.mistral.ai/v1/files',
expect.any(Object),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer default-from-env-key',
}),
}),
);
// Verify the OCR model defaulted to mistral-ocr-latest
expect(mockAxios.post).toHaveBeenCalledWith(
'https://default-from-env.mistral.ai/v1/ocr',
expect.objectContaining({
model: 'mistral-ocr-latest',
}),
expect.any(Object),
);
// Check result
expect(result.text).toEqual('Content from default configuration\n\n');
});
});
});

View file

@ -0,0 +1,5 @@
const crud = require('./crud');
module.exports = {
...crud,
};

View file

@ -49,6 +49,7 @@ async function encodeAndFormat(req, files, endpoint, mode) {
const promises = [];
const encodingMethods = {};
const result = {
text: '',
files: [],
image_urls: [],
};
@ -59,6 +60,9 @@ async function encodeAndFormat(req, files, endpoint, mode) {
for (let file of files) {
const source = file.source ?? FileSources.local;
if (source === FileSources.text && file.text) {
result.text += `${!result.text ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${file.text}\n`;
}
if (!file.height) {
promises.push([file, null]);
@ -85,6 +89,10 @@ async function encodeAndFormat(req, files, endpoint, mode) {
promises.push(preparePayload(req, file));
}
if (result.text) {
result.text += '\n```';
}
const detail = req.body.imageDetail ?? ImageDetail.auto;
/** @type {Array<[MongoFile, string]>} */

View file

@ -28,8 +28,8 @@ const { addResourceFileId, deleteResourceFileId } = require('~/server/controller
const { addAgentResourceFile, removeAgentResourceFiles } = require('~/models/Agent');
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
const { createFile, updateFileUsage, deleteFiles } = require('~/models/File');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { getEndpointsConfig } = require('~/server/services/Config');
const { loadAuthValues } = require('~/app/clients/tools/util');
const { LB_QueueAsyncCall } = require('~/server/utils/queue');
const { getStrategyFunctions } = require('./strategies');
const { determineFileType } = require('~/server/utils');
@ -162,7 +162,6 @@ const processDeleteRequest = async ({ req, files }) => {
for (const file of files) {
const source = file.source ?? FileSources.local;
if (req.body.agent_id && req.body.tool_resource) {
agentFiles.push({
tool_resource: req.body.tool_resource,
@ -170,6 +169,11 @@ const processDeleteRequest = async ({ req, files }) => {
});
}
if (source === FileSources.text) {
resolvedFileIds.push(file.file_id);
continue;
}
if (checkOpenAIStorage(source) && !client[source]) {
await initializeClients();
}
@ -521,6 +525,52 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
if (!isFileSearchEnabled) {
throw new Error('File search is not enabled for Agents');
}
} else if (tool_resource === EToolResources.ocr) {
const isOCREnabled = await checkCapability(req, AgentCapabilities.ocr);
if (!isOCREnabled) {
throw new Error('OCR capability is not enabled for Agents');
}
const { handleFileUpload } = getStrategyFunctions(
req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr,
);
const { file_id, temp_file_id } = metadata;
const {
text,
bytes,
// TODO: OCR images support?
images,
filename,
filepath: ocrFileURL,
} = await handleFileUpload({ req, file, file_id, entity_id: agent_id });
const fileInfo = removeNullishValues({
text,
bytes,
file_id,
temp_file_id,
user: req.user.id,
type: file.mimetype,
filepath: ocrFileURL,
source: FileSources.text,
filename: filename ?? file.originalname,
model: messageAttachment ? undefined : req.body.model,
context: messageAttachment ? FileContext.message_attachment : FileContext.agents,
});
if (!messageAttachment && tool_resource) {
await addAgentResourceFile({
req,
file_id,
agent_id,
tool_resource,
});
}
const result = await createFile(fileInfo, true);
return res
.status(200)
.json({ message: 'Agent file uploaded and processed successfully', ...result });
}
const source =

View file

@ -24,6 +24,7 @@ const {
const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./OpenAI');
const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code');
const { uploadVectors, deleteVectors } = require('./VectorDB');
const { uploadMistralOCR } = require('./MistralOCR');
/**
* Firebase Storage Strategy Functions
@ -127,6 +128,26 @@ const codeOutputStrategy = () => ({
getDownloadStream: getCodeOutputDownloadStream,
});
const mistralOCRStrategy = () => ({
/** @type {typeof saveFileFromURL | null} */
saveURL: null,
/** @type {typeof getLocalFileURL | null} */
getFileURL: null,
/** @type {typeof saveLocalBuffer | null} */
saveBuffer: null,
/** @type {typeof processLocalAvatar | null} */
processAvatar: null,
/** @type {typeof uploadLocalImage | null} */
handleImageUpload: null,
/** @type {typeof prepareImagesLocal | null} */
prepareImagePayload: null,
/** @type {typeof deleteLocalFile | null} */
deleteFile: null,
/** @type {typeof getLocalFileStream | null} */
getDownloadStream: null,
handleFileUpload: uploadMistralOCR,
});
// Strategy Selector
const getStrategyFunctions = (fileSource) => {
if (fileSource === FileSources.firebase) {
@ -141,6 +162,8 @@ const getStrategyFunctions = (fileSource) => {
return vectorStrategy();
} else if (fileSource === FileSources.execute_code) {
return codeOutputStrategy();
} else if (fileSource === FileSources.mistral_ocr) {
return mistralOCRStrategy();
} else {
throw new Error('Invalid file source');
}