👁️ feat: Azure Mistral OCR Strategy (#7888)

* 👁️ feat: Add Azure Mistral OCR strategy and endpoint integration

This commit introduces a new OCR strategy named 'azure_mistral_ocr', allowing the use of a Mistral OCR endpoint deployed on Azure. The configuration, schemas, and file upload strategies have been updated to support this integration, enabling seamless OCR processing via Azure-hosted Mistral services.

* 🗑️ chore: Clean up .gitignore by removing commented-out uncommon directory name

* chore: remove unused vars

* refactor: Move createAxiosInstance to packages/api/utils and update imports

- Removed the createAxiosInstance function from the config module and relocated it to a new utils module for better organization.
- Updated import paths in relevant files to reflect the new location of createAxiosInstance.
- Added tests for createAxiosInstance to ensure proper functionality and proxy configuration handling.

* chore: move axios helpers to packages/api

- Added logAxiosError function to @librechat/api for centralized error logging.
- Updated imports across various files to use the new logAxiosError function.
- Removed the old axios.js utility file as it is no longer needed.

* chore: Update Jest moduleNameMapper for improved path resolution

- Added a new mapping for '~/' to resolve module paths in Jest configuration, enhancing import handling for the project.

* feat: Implement Mistral OCR API integration in TS

* chore: Update MistralOCR tests based on new imports

* fix: Enhance MistralOCR configuration handling and tests

- Introduced helper functions for resolving configuration values from environment variables or hardcoded settings.
- Updated the uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration resolution logic.
- Improved test cases to ensure correct behavior when mixing environment variables and hardcoded values.
- Mocked file upload and signed URL responses in tests to validate functionality without external dependencies.

* feat: Enhance MistralOCR functionality with improved configuration and error handling

- Introduced helper functions for loading authentication configuration and resolving values from environment variables.
- Updated uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration logic.
- Added utility functions for processing OCR results and creating error messages.
- Improved document type determination and result aggregation for better OCR processing.

* refactor: Reorganize OCR type imports in Mistral CRUD file

- Moved OCRResult, OCRResultPage, and OCRImage imports to a more logical grouping for better readability and maintainability.

* feat: Add file exports to API and create files index

* chore: Update OCR types for enhanced structure and clarity

- Redesigned OCRImage interface to include mandatory fields and improved naming conventions.
- Added PageDimensions interface for better representation of page metrics.
- Updated OCRResultPage to include dimensions and mandatory images array.
- Refined OCRResult to include document annotation and usage information.

* refactor: use TS counterpart of uploadOCR methods

* ci: Update MistralOCR tests to reflect new OCR result structure

* chore: Bump version of @librechat/api to 1.2.3 in package.json and package-lock.json

* chore: Update CONFIG_VERSION to 1.2.8

* chore: remove unused sendEvent function from config module (now imported from '@librechat/api')

* chore: remove MistralOCR service files and tests (now in '@librechat/api')

* ci: update logger import in ModelService tests to use @librechat/data-schemas

---------

Co-authored-by: arthurolivierfortin <arthurolivier.fortin@gmail.com>
This commit is contained in:
Danny Avila 2025-06-13 15:14:57 -04:00 committed by GitHub
parent 46ff008b07
commit 5f2d1c5dc9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 2245 additions and 1235 deletions

View file

@ -1,10 +1,11 @@
const { z } = require('zod');
const axios = require('axios');
const { Ollama } = require('ollama');
const { sleep } = require('@librechat/agents');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { Constants } = require('librechat-data-provider');
const { deriveBaseURL, logAxiosError } = require('~/utils');
const { sleep } = require('~/server/utils');
const { logger } = require('~/config');
const { deriveBaseURL } = require('~/utils');
const ollamaPayloadSchema = z.object({
mirostat: z.number().optional(),
@ -67,7 +68,7 @@ class OllamaClient {
return models;
} catch (error) {
const logMessage =
'Failed to fetch models from Ollama API. If you are not using Ollama directly, and instead, through some aggregator or reverse proxy that handles fetching via OpenAI spec, ensure the name of the endpoint doesn\'t start with `ollama` (case-insensitive).';
"Failed to fetch models from Ollama API. If you are not using Ollama directly, and instead, through some aggregator or reverse proxy that handles fetching via OpenAI spec, ensure the name of the endpoint doesn't start with `ollama` (case-insensitive).";
logAxiosError({ message: logMessage, error });
return [];
}

View file

@ -4,12 +4,13 @@ const { v4 } = require('uuid');
const OpenAI = require('openai');
const FormData = require('form-data');
const { tool } = require('@langchain/core/tools');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { ContentTypes, EImageOutputType } = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { logAxiosError, extractBaseURL } = require('~/utils');
const { extractBaseURL } = require('~/utils');
const { getFiles } = require('~/models/File');
const { logger } = require('~/config');
/** Default descriptions for image generation tool */
const DEFAULT_IMAGE_GEN_DESCRIPTION = `

View file

@ -1,4 +1,3 @@
const axios = require('axios');
const { EventSource } = require('eventsource');
const { Time } = require('librechat-data-provider');
const { MCPManager, FlowStateManager } = require('@librechat/api');
@ -37,60 +36,8 @@ function getFlowStateManager(flowsCache) {
return flowManager;
}
/**
* Sends message data in Server Sent Events format.
* @param {ServerResponse} res - The server response.
* @param {{ data: string | Record<string, unknown>, event?: string }} event - The message event.
* @param {string} event.event - The type of event.
* @param {string} event.data - The message to be sent.
*/
const sendEvent = (res, event) => {
if (typeof event.data === 'string' && event.data.length === 0) {
return;
}
res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`);
};
/**
* Creates and configures an Axios instance with optional proxy settings.
*
* @typedef {import('axios').AxiosInstance} AxiosInstance
* @typedef {import('axios').AxiosProxyConfig} AxiosProxyConfig
*
* @returns {AxiosInstance} A configured Axios instance
* @throws {Error} If there's an issue creating the Axios instance or parsing the proxy URL
*/
function createAxiosInstance() {
const instance = axios.create();
if (process.env.proxy) {
try {
const url = new URL(process.env.proxy);
/** @type {AxiosProxyConfig} */
const proxyConfig = {
host: url.hostname.replace(/^\[|\]$/g, ''),
protocol: url.protocol.replace(':', ''),
};
if (url.port) {
proxyConfig.port = parseInt(url.port, 10);
}
instance.defaults.proxy = proxyConfig;
} catch (error) {
console.error('Error parsing proxy URL:', error);
throw new Error(`Invalid proxy URL: ${process.env.proxy}`);
}
}
return instance;
}
module.exports = {
logger,
sendEvent,
getMCPManager,
createAxiosInstance,
getFlowStateManager,
};

View file

@ -1,9 +1,9 @@
const jwt = require('jsonwebtoken');
const { nanoid } = require('nanoid');
const { sendEvent } = require('@librechat/api');
const { tool } = require('@langchain/core/tools');
const { logger } = require('@librechat/data-schemas');
const { GraphEvents, sleep } = require('@librechat/agents');
const { sendEvent, logAxiosError } = require('@librechat/api');
const {
Time,
CacheKeys,
@ -19,7 +19,6 @@ const { encryptV2, decryptV2 } = require('~/server/utils/crypto');
const { getActions, deleteActions } = require('~/models/Action');
const { deleteAssistant } = require('~/models/Assistant');
const { getFlowStateManager } = require('~/config');
const { logAxiosError } = require('~/utils');
const { getLogStores } = require('~/cache');
const { findToken } = require('~/models');

View file

@ -1,7 +1,6 @@
const FormData = require('form-data');
const { getCodeBaseURL } = require('@librechat/agents');
const { createAxiosInstance } = require('~/config');
const { logAxiosError } = require('~/utils');
const { createAxiosInstance, logAxiosError } = require('@librechat/api');
const axios = createAxiosInstance();

View file

@ -1,6 +1,8 @@
const path = require('path');
const { v4 } = require('uuid');
const axios = require('axios');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { getCodeBaseURL } = require('@librechat/agents');
const {
Tools,
@ -12,8 +14,6 @@ const {
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { convertImage } = require('~/server/services/Files/images/convert');
const { createFile, getFiles, updateFile } = require('~/models/File');
const { logAxiosError } = require('~/utils');
const { logger } = require('~/config');
/**
* Process OpenAI image files, convert to target format, save and return file metadata.

View file

@ -1,238 +0,0 @@
// ~/server/services/Files/MistralOCR/crud.js
const fs = require('fs');
const path = require('path');
const FormData = require('form-data');
const {
FileSources,
envVarRegex,
extractEnvVariable,
extractVariableName,
} = require('librechat-data-provider');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { logger, createAxiosInstance } = require('~/config');
const { logAxiosError } = require('~/utils/axios');
const axios = createAxiosInstance();
/**
* Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory
*
* @param {Object} params Upload parameters
* @param {string} params.filePath The path to the file on disk
* @param {string} [params.fileName] Optional filename to use (defaults to the name from filePath)
* @param {string} params.apiKey Mistral API key
* @param {string} [params.baseURL=https://api.mistral.ai/v1] Mistral API base URL
* @returns {Promise<Object>} The response from Mistral API
*/
async function uploadDocumentToMistral({
filePath,
fileName = '',
apiKey,
baseURL = 'https://api.mistral.ai/v1',
}) {
const form = new FormData();
form.append('purpose', 'ocr');
const actualFileName = fileName || path.basename(filePath);
const fileStream = fs.createReadStream(filePath);
form.append('file', fileStream, { filename: actualFileName });
return axios
.post(`${baseURL}/files`, form, {
headers: {
Authorization: `Bearer ${apiKey}`,
...form.getHeaders(),
},
maxBodyLength: Infinity,
maxContentLength: Infinity,
})
.then((res) => res.data)
.catch((error) => {
throw error;
});
}
async function getSignedUrl({
apiKey,
fileId,
expiry = 24,
baseURL = 'https://api.mistral.ai/v1',
}) {
return axios
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
headers: {
Authorization: `Bearer ${apiKey}`,
},
})
.then((res) => res.data)
.catch((error) => {
logger.error('Error fetching signed URL:', error.message);
throw error;
});
}
/**
* @param {Object} params
* @param {string} params.apiKey
* @param {string} params.url - The document or image URL
* @param {string} [params.documentType='document_url'] - 'document_url' or 'image_url'
* @param {string} [params.model]
* @param {string} [params.baseURL]
* @returns {Promise<OCRResult>}
*/
async function performOCR({
apiKey,
url,
documentType = 'document_url',
model = 'mistral-ocr-latest',
baseURL = 'https://api.mistral.ai/v1',
}) {
const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url';
return axios
.post(
`${baseURL}/ocr`,
{
model,
image_limit: 0,
include_image_base64: false,
document: {
type: documentType,
[documentKey]: url,
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
},
)
.then((res) => res.data)
.catch((error) => {
logger.error('Error performing OCR:', error.message);
throw error;
});
}
/**
* Uploads a file to the Mistral OCR API and processes the OCR result.
*
* @param {Object} params - The params object.
* @param {ServerRequest} params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
* have a `mimetype` property that tells us the file type
* @param {string} params.file_id - The file ID.
* @param {string} [params.entity_id] - The entity ID, not used here but passed for consistency.
* @returns {Promise<{ filepath: string, bytes: number }>} - The result object containing the processed `text` and `images` (not currently used),
* along with the `filename` and `bytes` properties.
*/
const uploadMistralOCR = async ({ req, file, file_id, entity_id }) => {
try {
/** @type {TCustomConfig['ocr']} */
const ocrConfig = req.app.locals?.ocr;
const apiKeyConfig = ocrConfig.apiKey || '';
const baseURLConfig = ocrConfig.baseURL || '';
const isApiKeyEnvVar = envVarRegex.test(apiKeyConfig);
const isBaseURLEnvVar = envVarRegex.test(baseURLConfig);
const isApiKeyEmpty = !apiKeyConfig.trim();
const isBaseURLEmpty = !baseURLConfig.trim();
let apiKey, baseURL;
if (isApiKeyEnvVar || isBaseURLEnvVar || isApiKeyEmpty || isBaseURLEmpty) {
const apiKeyVarName = isApiKeyEnvVar ? extractVariableName(apiKeyConfig) : 'OCR_API_KEY';
const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL';
const authValues = await loadAuthValues({
userId: req.user.id,
authFields: [baseURLVarName, apiKeyVarName],
optional: new Set([baseURLVarName]),
});
apiKey = authValues[apiKeyVarName];
baseURL = authValues[baseURLVarName];
} else {
apiKey = apiKeyConfig;
baseURL = baseURLConfig;
}
const mistralFile = await uploadDocumentToMistral({
filePath: file.path,
fileName: file.originalname,
apiKey,
baseURL,
});
const modelConfig = ocrConfig.mistralModel || '';
const model = envVarRegex.test(modelConfig)
? extractEnvVariable(modelConfig)
: modelConfig.trim() || 'mistral-ocr-latest';
const signedUrlResponse = await getSignedUrl({
apiKey,
baseURL,
fileId: mistralFile.id,
});
const mimetype = (file.mimetype || '').toLowerCase();
const originalname = file.originalname || '';
const isImage =
mimetype.startsWith('image') || /\.(png|jpe?g|gif|bmp|webp|tiff?)$/i.test(originalname);
const documentType = isImage ? 'image_url' : 'document_url';
const ocrResult = await performOCR({
apiKey,
baseURL,
model,
url: signedUrlResponse.url,
documentType,
});
let aggregatedText = '';
const images = [];
ocrResult.pages.forEach((page, index) => {
if (ocrResult.pages.length > 1) {
aggregatedText += `# PAGE ${index + 1}\n`;
}
aggregatedText += page.markdown + '\n\n';
if (page.images && page.images.length > 0) {
page.images.forEach((image) => {
if (image.image_base64) {
images.push(image.image_base64);
}
});
}
});
return {
filename: file.originalname,
bytes: aggregatedText.length * 4,
filepath: FileSources.mistral_ocr,
text: aggregatedText,
images,
};
} catch (error) {
let message = 'Error uploading document to Mistral OCR API';
const detail = error?.response?.data?.detail;
if (detail && detail !== '') {
message = detail;
}
const responseMessage = error?.response?.data?.message;
throw new Error(
`${logAxiosError({ error, message })}${responseMessage && responseMessage !== '' ? ` - ${responseMessage}` : ''}`,
);
}
};
module.exports = {
uploadDocumentToMistral,
uploadMistralOCR,
getSignedUrl,
performOCR,
};

View file

@ -1,848 +0,0 @@
const fs = require('fs');
const mockAxios = {
interceptors: {
request: { use: jest.fn(), eject: jest.fn() },
response: { use: jest.fn(), eject: jest.fn() },
},
create: jest.fn().mockReturnValue({
defaults: {
proxy: null,
},
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
}),
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
reset: jest.fn().mockImplementation(function () {
this.get.mockClear();
this.post.mockClear();
this.put.mockClear();
this.delete.mockClear();
this.create.mockClear();
}),
};
jest.mock('axios', () => mockAxios);
jest.mock('fs');
jest.mock('~/config', () => ({
logger: {
error: jest.fn(),
},
createAxiosInstance: () => mockAxios,
}));
jest.mock('~/server/services/Tools/credentials', () => ({
loadAuthValues: jest.fn(),
}));
const { uploadDocumentToMistral, uploadMistralOCR, getSignedUrl, performOCR } = require('./crud');
describe('MistralOCR Service', () => {
afterEach(() => {
mockAxios.reset();
jest.clearAllMocks();
});
describe('uploadDocumentToMistral', () => {
beforeEach(() => {
// Create a more complete mock for file streams that FormData can work with
const mockReadStream = {
on: jest.fn().mockImplementation(function (event, handler) {
// Simulate immediate 'end' event to make FormData complete processing
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function () {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
};
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
// Mock FormData's append to avoid actual stream processing
jest.mock('form-data', () => {
const mockFormData = function () {
return {
append: jest.fn(),
getHeaders: jest
.fn()
.mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }),
getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')),
getLength: jest.fn().mockReturnValue(100),
};
};
return mockFormData;
});
});
it('should upload a document to Mistral API using file streaming', async () => {
const mockResponse = { data: { id: 'file-123', purpose: 'ocr' } };
mockAxios.post.mockResolvedValueOnce(mockResponse);
const result = await uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
});
// Check that createReadStream was called with the correct file path
expect(fs.createReadStream).toHaveBeenCalledWith('/path/to/test.pdf');
// Since we're mocking FormData, we'll just check that axios was called correctly
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files',
expect.anything(),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer test-api-key',
}),
maxBodyLength: Infinity,
maxContentLength: Infinity,
}),
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors during document upload', async () => {
const errorMessage = 'API error';
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
await expect(
uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
}),
).rejects.toThrow(errorMessage);
});
});
describe('getSignedUrl', () => {
it('should fetch signed URL from Mistral API', async () => {
const mockResponse = { data: { url: 'https://document-url.com' } };
mockAxios.get.mockResolvedValueOnce(mockResponse);
const result = await getSignedUrl({
fileId: 'file-123',
apiKey: 'test-api-key',
});
expect(mockAxios.get).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-123/url?expiry=24',
{
headers: {
Authorization: 'Bearer test-api-key',
},
},
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors when fetching signed URL', async () => {
const errorMessage = 'API error';
mockAxios.get.mockRejectedValueOnce(new Error(errorMessage));
await expect(
getSignedUrl({
fileId: 'file-123',
apiKey: 'test-api-key',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage);
});
});
describe('performOCR', () => {
it('should perform OCR using Mistral API (document_url)', async () => {
const mockResponse = {
data: {
pages: [{ markdown: 'Page 1 content' }, { markdown: 'Page 2 content' }],
},
};
mockAxios.post.mockResolvedValueOnce(mockResponse);
const result = await performOCR({
apiKey: 'test-api-key',
url: 'https://document-url.com',
model: 'mistral-ocr-latest',
documentType: 'document_url',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/ocr',
{
model: 'mistral-ocr-latest',
include_image_base64: false,
image_limit: 0,
document: {
type: 'document_url',
document_url: 'https://document-url.com',
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: 'Bearer test-api-key',
},
},
);
expect(result).toEqual(mockResponse.data);
});
it('should perform OCR using Mistral API (image_url)', async () => {
const mockResponse = {
data: {
pages: [{ markdown: 'Image OCR content' }],
},
};
mockAxios.post.mockResolvedValueOnce(mockResponse);
const result = await performOCR({
apiKey: 'test-api-key',
url: 'https://image-url.com/image.png',
model: 'mistral-ocr-latest',
documentType: 'image_url',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/ocr',
{
model: 'mistral-ocr-latest',
include_image_base64: false,
image_limit: 0,
document: {
type: 'image_url',
image_url: 'https://image-url.com/image.png',
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: 'Bearer test-api-key',
},
},
);
expect(result).toEqual(mockResponse.data);
});
it('should handle errors during OCR processing', async () => {
const errorMessage = 'OCR processing error';
mockAxios.post.mockRejectedValueOnce(new Error(errorMessage));
await expect(
performOCR({
apiKey: 'test-api-key',
url: 'https://document-url.com',
}),
).rejects.toThrow();
const { logger } = require('~/config');
expect(logger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage);
});
});
describe('uploadMistralOCR', () => {
beforeEach(() => {
const mockReadStream = {
on: jest.fn().mockImplementation(function (event, handler) {
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function () {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
};
fs.createReadStream = jest.fn().mockReturnValue(mockReadStream);
});
it('should process OCR for a file with standard configuration', async () => {
// Setup mocks
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload response
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
// Mock signed URL response
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
// Mock OCR response with text and images
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [
{
markdown: 'Page 1 content',
images: [{ image_base64: 'base64image1' }],
},
{
markdown: 'Page 2 content',
images: [{ image_base64: 'base64image2' }],
},
],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Use environment variable syntax to ensure loadAuthValues is called
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-medium',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
mimetype: 'application/pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
optional: expect.any(Set),
});
// Verify OCR result
expect(result).toEqual({
filename: 'document.pdf',
bytes: expect.any(Number),
filepath: 'mistral_ocr',
text: expect.stringContaining('# PAGE 1'),
images: ['base64image1', 'base64image2'],
});
});
it('should process OCR for an image file and use image_url type', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload response
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-456', purpose: 'ocr' },
});
// Mock signed URL response
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com/image.png' },
});
// Mock OCR response for image
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [
{
markdown: 'Image OCR result',
images: [{ image_base64: 'imgbase64' }],
},
],
},
});
const req = {
user: { id: 'user456' },
app: {
locals: {
ocr: {
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-medium',
},
},
},
};
const file = {
path: '/tmp/upload/image.png',
originalname: 'image.png',
mimetype: 'image/png',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file456',
entity_id: 'entity456',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/image.png');
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user456',
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
optional: expect.any(Set),
});
// Check that the OCR API was called with image_url type
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/ocr',
expect.objectContaining({
document: expect.objectContaining({
type: 'image_url',
image_url: 'https://signed-url.com/image.png',
}),
}),
expect.any(Object),
);
expect(result).toEqual({
filename: 'image.png',
bytes: expect.any(Number),
filepath: 'mistral_ocr',
text: expect.stringContaining('Image OCR result'),
images: ['imgbase64'],
});
});
it('should process variable references in configuration', async () => {
// Setup mocks with environment variables
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
CUSTOM_API_KEY: 'custom-api-key',
CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1',
});
// Mock API responses
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [{ markdown: 'Content from custom API' }],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: '${CUSTOM_API_KEY}',
baseURL: '${CUSTOM_BASEURL}',
mistralModel: '${CUSTOM_MODEL}',
},
},
},
};
// Set environment variable for model
process.env.CUSTOM_MODEL = 'mistral-large';
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify that custom environment variables were extracted and used
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'],
optional: expect.any(Set),
});
// Check that mistral-large was used in the OCR API call
expect(mockAxios.post).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
model: 'mistral-large',
}),
expect.anything(),
);
expect(result.text).toEqual('Content from custom API\n\n');
});
it('should fall back to default values when variables are not properly formatted', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'default-api-key',
OCR_BASEURL: undefined, // Testing optional parameter
});
mockAxios.post.mockResolvedValueOnce({
data: { id: 'file-123', purpose: 'ocr' },
});
mockAxios.get.mockResolvedValueOnce({
data: { url: 'https://signed-url.com' },
});
mockAxios.post.mockResolvedValueOnce({
data: {
pages: [{ markdown: 'Default API result' }],
},
});
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Use environment variable syntax to ensure loadAuthValues is called
apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name
baseURL: '${OCR_BASEURL}', // Using valid env var format
mistralModel: 'mistral-ocr-latest', // Plain string value
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Should use the default values
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'INVALID_FORMAT'],
optional: expect.any(Set),
});
// Should use the default model when not using environment variable format
expect(mockAxios.post).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
model: 'mistral-ocr-latest',
}),
expect.anything(),
);
});
it('should handle API errors during OCR process', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
});
// Mock file upload to fail
mockAxios.post.mockRejectedValueOnce(new Error('Upload failed'));
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: 'OCR_API_KEY',
baseURL: 'OCR_BASEURL',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
};
await expect(
uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
}),
).rejects.toThrow('Error uploading document to Mistral OCR API');
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
});
it('should handle single page documents without page numbering', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included
});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Single page content' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: 'OCR_API_KEY',
baseURL: 'OCR_BASEURL',
mistralModel: 'mistral-ocr-latest',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'single-page.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify that single page documents don't include page numbering
expect(result.text).not.toContain('# PAGE');
expect(result.text).toEqual('Single page content\n\n');
});
it('should use literal values in configuration when provided directly', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
// We'll still mock this but it should not be used for literal values
loadAuthValues.mockResolvedValue({});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Processed with literal config values' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Direct values that should be used as-is, without variable substitution
apiKey: 'actual-api-key-value',
baseURL: 'https://direct-api-url.mistral.ai/v1',
mistralModel: 'mistral-direct-model',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'direct-values.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify the correct URL was used with the direct baseURL value
expect(mockAxios.post).toHaveBeenCalledWith(
'https://direct-api-url.mistral.ai/v1/files',
expect.any(Object),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer actual-api-key-value',
}),
}),
);
// Check the OCR call was made with the direct model value
expect(mockAxios.post).toHaveBeenCalledWith(
'https://direct-api-url.mistral.ai/v1/ocr',
expect.objectContaining({
model: 'mistral-direct-model',
}),
expect.any(Object),
);
// Verify the result
expect(result.text).toEqual('Processed with literal config values\n\n');
// Verify loadAuthValues was never called since we used direct values
expect(loadAuthValues).not.toHaveBeenCalled();
});
it('should handle empty configuration values and use defaults', async () => {
const { loadAuthValues } = require('~/server/services/Tools/credentials');
// Set up the mock values to be returned by loadAuthValues
loadAuthValues.mockResolvedValue({
OCR_API_KEY: 'default-from-env-key',
OCR_BASEURL: 'https://default-from-env.mistral.ai/v1',
});
// Clear all previous mocks
mockAxios.post.mockClear();
mockAxios.get.mockClear();
// 1. First mock: File upload response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({ data: { id: 'file-123', purpose: 'ocr' } }),
);
// 2. Second mock: Signed URL response
mockAxios.get.mockImplementationOnce(() =>
Promise.resolve({ data: { url: 'https://signed-url.com' } }),
);
// 3. Third mock: OCR response
mockAxios.post.mockImplementationOnce(() =>
Promise.resolve({
data: {
pages: [{ markdown: 'Content from default configuration' }],
},
}),
);
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
// Empty string values - should fall back to defaults
apiKey: '',
baseURL: '',
mistralModel: '',
},
},
},
};
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'empty-config.pdf',
};
const result = await uploadMistralOCR({
req,
file,
file_id: 'file123',
entity_id: 'entity123',
});
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload/file.pdf');
// Verify loadAuthValues was called with the default variable names
expect(loadAuthValues).toHaveBeenCalledWith({
userId: 'user123',
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
optional: expect.any(Set),
});
// Verify the API calls used the default values from loadAuthValues
expect(mockAxios.post).toHaveBeenCalledWith(
'https://default-from-env.mistral.ai/v1/files',
expect.any(Object),
expect.objectContaining({
headers: expect.objectContaining({
Authorization: 'Bearer default-from-env-key',
}),
}),
);
// Verify the OCR model defaulted to mistral-ocr-latest
expect(mockAxios.post).toHaveBeenCalledWith(
'https://default-from-env.mistral.ai/v1/ocr',
expect.objectContaining({
model: 'mistral-ocr-latest',
}),
expect.any(Object),
);
// Check result
expect(result.text).toEqual('Content from default configuration\n\n');
});
});
});

View file

@ -1,5 +0,0 @@
const crud = require('./crud');
module.exports = {
...crud,
};

View file

@ -1,9 +1,9 @@
const fs = require('fs');
const axios = require('axios');
const FormData = require('form-data');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { FileSources } = require('librechat-data-provider');
const { logAxiosError } = require('~/utils');
const { logger } = require('~/config');
/**
* Deletes a file from the vector database. This function takes a file object, constructs the full path, and

View file

@ -1,4 +1,5 @@
const axios = require('axios');
const { logAxiosError } = require('@librechat/api');
const {
FileSources,
VisionModes,
@ -7,8 +8,6 @@ const {
EModelEndpoint,
} = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { logAxiosError } = require('~/utils');
const { logger } = require('~/config');
/**
* Converts a readable stream to a base64 encoded string.

View file

@ -522,7 +522,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
throw new Error('OCR capability is not enabled for Agents');
}
const { handleFileUpload: uploadMistralOCR } = getStrategyFunctions(
const { handleFileUpload: uploadOCR } = getStrategyFunctions(
req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr,
);
const { file_id, temp_file_id } = metadata;
@ -534,7 +534,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
images,
filename,
filepath: ocrFileURL,
} = await uploadMistralOCR({ req, file, file_id, entity_id: agent_id, basePath });
} = await uploadOCR({ req, file, loadAuthValues });
const fileInfo = removeNullishValues({
text,

View file

@ -1,4 +1,5 @@
const { FileSources } = require('librechat-data-provider');
const { uploadMistralOCR, uploadAzureMistralOCR } = require('@librechat/api');
const {
getFirebaseURL,
prepareImageURL,
@ -46,7 +47,6 @@ const {
const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./OpenAI');
const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code');
const { uploadVectors, deleteVectors } = require('./VectorDB');
const { uploadMistralOCR } = require('./MistralOCR');
/**
* Firebase Storage Strategy Functions
@ -202,6 +202,26 @@ const mistralOCRStrategy = () => ({
handleFileUpload: uploadMistralOCR,
});
const azureMistralOCRStrategy = () => ({
/** @type {typeof saveFileFromURL | null} */
saveURL: null,
/** @type {typeof getLocalFileURL | null} */
getFileURL: null,
/** @type {typeof saveLocalBuffer | null} */
saveBuffer: null,
/** @type {typeof processLocalAvatar | null} */
processAvatar: null,
/** @type {typeof uploadLocalImage | null} */
handleImageUpload: null,
/** @type {typeof prepareImagesLocal | null} */
prepareImagePayload: null,
/** @type {typeof deleteLocalFile | null} */
deleteFile: null,
/** @type {typeof getLocalFileStream | null} */
getDownloadStream: null,
handleFileUpload: uploadAzureMistralOCR,
});
// Strategy Selector
const getStrategyFunctions = (fileSource) => {
if (fileSource === FileSources.firebase) {
@ -222,6 +242,8 @@ const getStrategyFunctions = (fileSource) => {
return codeOutputStrategy();
} else if (fileSource === FileSources.mistral_ocr) {
return mistralOCRStrategy();
} else if (fileSource === FileSources.azure_mistral_ocr) {
return azureMistralOCRStrategy();
} else {
throw new Error('Invalid file source');
}

View file

@ -1,12 +1,13 @@
const axios = require('axios');
const { Providers } = require('@librechat/agents');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { EModelEndpoint, defaultModels, CacheKeys } = require('librechat-data-provider');
const { inputSchema, logAxiosError, extractBaseURL, processModelData } = require('~/utils');
const { inputSchema, extractBaseURL, processModelData } = require('~/utils');
const { OllamaClient } = require('~/app/clients/OllamaClient');
const { isUserProvided } = require('~/server/utils');
const getLogStores = require('~/cache/getLogStores');
const { logger } = require('~/config');
/**
* Splits a string by commas and trims each resulting value.

View file

@ -1,6 +1,6 @@
const axios = require('axios');
const { logger } = require('@librechat/data-schemas');
const { EModelEndpoint, defaultModels } = require('librechat-data-provider');
const { logger } = require('~/config');
const {
fetchModels,
@ -28,7 +28,8 @@ jest.mock('~/cache/getLogStores', () =>
set: jest.fn().mockResolvedValue(true),
})),
);
jest.mock('~/config', () => ({
jest.mock('@librechat/data-schemas', () => ({
...jest.requireActual('@librechat/data-schemas'),
logger: {
error: jest.fn(),
},

View file

@ -1,6 +1,6 @@
const axios = require('axios');
const { logAxiosError } = require('@librechat/api');
const { EModelEndpoint } = require('librechat-data-provider');
const { logAxiosError } = require('~/utils');
/**
* @typedef {Object} RetrieveOptions

View file

@ -1,9 +1,9 @@
const axios = require('axios');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { TokenExchangeMethodEnum } = require('librechat-data-provider');
const { handleOAuthToken } = require('~/models/Token');
const { decryptV2 } = require('~/server/utils/crypto');
const { logAxiosError } = require('~/utils');
const { logger } = require('~/config');
/**
* Processes the access tokens and stores them in the database.

View file

@ -4,9 +4,9 @@ const axios = require('axios');
const FormData = require('form-data');
const nodemailer = require('nodemailer');
const handlebars = require('handlebars');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { isEnabled } = require('~/server/utils/handleText');
const { logAxiosError } = require('~/utils');
const logger = require('~/config/winston');
/**
* Sends an email using Mailgun API.

View file

@ -41,10 +41,7 @@ jest.mock('winston-daily-rotate-file', () => {
});
jest.mock('~/config', () => {
const actualModule = jest.requireActual('~/config');
return {
sendEvent: actualModule.sendEvent,
createAxiosInstance: actualModule.createAxiosInstance,
logger: {
info: jest.fn(),
warn: jest.fn(),

View file

@ -1,46 +0,0 @@
const { logger } = require('~/config');
/**
* Logs Axios errors based on the error object and a custom message.
*
* @param {Object} options - The options object.
* @param {string} options.message - The custom message to be logged.
* @param {import('axios').AxiosError} options.error - The Axios error object.
* @returns {string} The log message.
*/
const logAxiosError = ({ message, error }) => {
let logMessage = message;
try {
const stack = error.stack || 'No stack trace available';
if (error.response?.status) {
const { status, headers, data } = error.response;
logMessage = `${message} The server responded with status ${status}: ${error.message}`;
logger.error(logMessage, {
status,
headers,
data,
stack,
});
} else if (error.request) {
const { method, url } = error.config || {};
logMessage = `${message} No response received for ${method ? method.toUpperCase() : ''} ${url || ''}: ${error.message}`;
logger.error(logMessage, {
requestInfo: { method, url },
stack,
});
} else if (error?.message?.includes("Cannot read properties of undefined (reading 'status')")) {
logMessage = `${message} It appears the request timed out or was unsuccessful: ${error.message}`;
logger.error(logMessage, { stack });
} else {
logMessage = `${message} An error occurred while setting up the request: ${error.message}`;
logger.error(logMessage, { stack });
}
} catch (err) {
logMessage = `Error in logAxiosError: ${err.message}`;
logger.error(logMessage, { stack: err.stack || 'No stack trace available' });
}
return logMessage;
};
module.exports = { logAxiosError };

View file

@ -1,5 +1,4 @@
const loadYaml = require('./loadYaml');
const axiosHelpers = require('./axios');
const tokenHelpers = require('./tokens');
const deriveBaseURL = require('./deriveBaseURL');
const extractBaseURL = require('./extractBaseURL');
@ -9,7 +8,6 @@ module.exports = {
loadYaml,
deriveBaseURL,
extractBaseURL,
...axiosHelpers,
...tokenHelpers,
findMessageContent,
};

14
package-lock.json generated
View file

@ -25447,6 +25447,16 @@
"resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz",
"integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g=="
},
"node_modules/@types/multer": {
"version": "1.4.13",
"resolved": "https://registry.npmjs.org/@types/multer/-/multer-1.4.13.tgz",
"integrity": "sha512-bhhdtPw7JqCiEfC9Jimx5LqX9BDIPJEh2q/fQ4bqbBPtyEZYr3cvF22NwG0DmPZNYA0CAf2CnqDB4KIGGpJcaw==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/express": "*"
}
},
"node_modules/@types/node": {
"version": "20.11.16",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.16.tgz",
@ -46052,7 +46062,7 @@
},
"packages/api": {
"name": "@librechat/api",
"version": "1.2.2",
"version": "1.2.3",
"license": "ISC",
"devDependencies": {
"@babel/preset-env": "^7.21.5",
@ -46069,6 +46079,7 @@
"@types/diff": "^6.0.0",
"@types/express": "^5.0.0",
"@types/jest": "^29.5.2",
"@types/multer": "^1.4.13",
"@types/node": "^20.3.0",
"@types/react": "^18.2.18",
"@types/winston": "^2.4.4",
@ -46086,6 +46097,7 @@
"@librechat/agents": "^2.4.37",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.11.2",
"axios": "^1.8.2",
"diff": "^7.0.0",
"eventsource": "^3.0.2",
"express": "^4.21.2",

View file

@ -5,6 +5,7 @@ export default {
testResultsProcessor: 'jest-junit',
moduleNameMapper: {
'^@src/(.*)$': '<rootDir>/src/$1',
'~/(.*)': '<rootDir>/src/$1',
},
// coverageThreshold: {
// global: {

View file

@ -1,6 +1,6 @@
{
"name": "@librechat/api",
"version": "1.2.2",
"version": "1.2.3",
"type": "commonjs",
"description": "MCP services for LibreChat",
"main": "dist/index.js",
@ -51,6 +51,7 @@
"@types/diff": "^6.0.0",
"@types/express": "^5.0.0",
"@types/jest": "^29.5.2",
"@types/multer": "^1.4.13",
"@types/node": "^20.3.0",
"@types/react": "^18.2.18",
"@types/winston": "^2.4.4",
@ -70,14 +71,15 @@
"peerDependencies": {
"@librechat/agents": "^2.4.37",
"@librechat/data-schemas": "*",
"librechat-data-provider": "*",
"@modelcontextprotocol/sdk": "^1.11.2",
"axios": "^1.8.2",
"diff": "^7.0.0",
"eventsource": "^3.0.2",
"express": "^4.21.2",
"node-fetch": "2.7.0",
"keyv": "^5.3.2",
"zod": "^3.22.4",
"tiktoken": "^1.0.15"
"librechat-data-provider": "*",
"node-fetch": "2.7.0",
"tiktoken": "^1.0.15",
"zod": "^3.22.4"
}
}

View file

@ -1,5 +1,6 @@
// rollup.config.js
import { readFileSync } from 'fs';
import json from '@rollup/plugin-json';
import terser from '@rollup/plugin-terser';
import replace from '@rollup/plugin-replace';
import commonjs from '@rollup/plugin-commonjs';
@ -29,6 +30,7 @@ const plugins = [
inlineSourceMap: true,
}),
terser(),
json(),
];
const cjsBuild = {

View file

@ -0,0 +1 @@
export * from './mistral/crud';

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,424 @@
import * as fs from 'fs';
import * as path from 'path';
import FormData from 'form-data';
import { logger } from '@librechat/data-schemas';
import {
FileSources,
envVarRegex,
extractEnvVariable,
extractVariableName,
} from 'librechat-data-provider';
import type { TCustomConfig } from 'librechat-data-provider';
import type { Request as ServerRequest } from 'express';
import type { AxiosError } from 'axios';
import type {
MistralFileUploadResponse,
MistralSignedUrlResponse,
MistralOCRUploadResult,
MistralOCRError,
OCRResultPage,
OCRResult,
OCRImage,
} from '~/types';
import { logAxiosError, createAxiosInstance } from '~/utils/axios';
const axios = createAxiosInstance();
const DEFAULT_MISTRAL_BASE_URL = 'https://api.mistral.ai/v1';
const DEFAULT_MISTRAL_MODEL = 'mistral-ocr-latest';
/** Helper type for auth configuration */
interface AuthConfig {
apiKey: string;
baseURL: string;
}
/** Helper type for OCR request context */
interface OCRContext {
req: Pick<ServerRequest, 'user' | 'app'> & {
user?: { id: string };
app: {
locals?: {
ocr?: TCustomConfig['ocr'];
};
};
};
file: Express.Multer.File;
loadAuthValues: (params: {
userId: string;
authFields: string[];
optional?: Set<string>;
}) => Promise<Record<string, string | undefined>>;
}
/**
* Uploads a document to Mistral API using file streaming to avoid loading the entire file into memory
* @param params Upload parameters
* @param params.filePath The path to the file on disk
* @param params.fileName Optional filename to use (defaults to the name from filePath)
* @param params.apiKey Mistral API key
* @param params.baseURL Mistral API base URL
* @returns The response from Mistral API
*/
export async function uploadDocumentToMistral({
apiKey,
filePath,
baseURL = DEFAULT_MISTRAL_BASE_URL,
fileName = '',
}: {
apiKey: string;
filePath: string;
baseURL?: string;
fileName?: string;
}): Promise<MistralFileUploadResponse> {
const form = new FormData();
form.append('purpose', 'ocr');
const actualFileName = fileName || path.basename(filePath);
const fileStream = fs.createReadStream(filePath);
form.append('file', fileStream, { filename: actualFileName });
return axios
.post(`${baseURL}/files`, form, {
headers: {
Authorization: `Bearer ${apiKey}`,
...form.getHeaders(),
},
maxBodyLength: Infinity,
maxContentLength: Infinity,
})
.then((res) => res.data)
.catch((error) => {
throw error;
});
}
export async function getSignedUrl({
apiKey,
fileId,
expiry = 24,
baseURL = DEFAULT_MISTRAL_BASE_URL,
}: {
apiKey: string;
fileId: string;
expiry?: number;
baseURL?: string;
}): Promise<MistralSignedUrlResponse> {
return axios
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
headers: {
Authorization: `Bearer ${apiKey}`,
},
})
.then((res) => res.data)
.catch((error) => {
logger.error('Error fetching signed URL:', error.message);
throw error;
});
}
/**
* @param {Object} params
* @param {string} params.apiKey
* @param {string} params.url - The document or image URL
* @param {string} [params.documentType='document_url'] - 'document_url' or 'image_url'
* @param {string} [params.model]
* @param {string} [params.baseURL]
* @returns {Promise<OCRResult>}
*/
export async function performOCR({
url,
apiKey,
model = DEFAULT_MISTRAL_MODEL,
baseURL = DEFAULT_MISTRAL_BASE_URL,
documentType = 'document_url',
}: {
url: string;
apiKey: string;
model?: string;
baseURL?: string;
documentType?: 'document_url' | 'image_url';
}): Promise<OCRResult> {
const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url';
return axios
.post(
`${baseURL}/ocr`,
{
model,
image_limit: 0,
include_image_base64: false,
document: {
type: documentType,
[documentKey]: url,
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
},
)
.then((res) => res.data)
.catch((error) => {
logger.error('Error performing OCR:', error.message);
throw error;
});
}
/**
* Determines if a value needs to be loaded from environment
*/
function needsEnvLoad(value: string): boolean {
return envVarRegex.test(value) || !value.trim();
}
/**
* Gets the environment variable name for a config value
*/
function getEnvVarName(configValue: string, defaultName: string): string {
if (!envVarRegex.test(configValue)) {
return defaultName;
}
return extractVariableName(configValue) || defaultName;
}
/**
* Resolves a configuration value from either hardcoded or environment
*/
async function resolveConfigValue(
configValue: string,
defaultEnvName: string,
authValues: Record<string, string | undefined>,
defaultValue?: string,
): Promise<string> {
// If it's a hardcoded value (not env var and not empty), use it directly
if (!needsEnvLoad(configValue)) {
return configValue;
}
// Otherwise, get from auth values
const envVarName = getEnvVarName(configValue, defaultEnvName);
return authValues[envVarName] || defaultValue || '';
}
/**
* Loads authentication configuration from OCR config
*/
async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> {
const ocrConfig = context.req.app.locals?.ocr;
const apiKeyConfig = ocrConfig?.apiKey || '';
const baseURLConfig = ocrConfig?.baseURL || '';
// If both are hardcoded, return them directly
if (!needsEnvLoad(apiKeyConfig) && !needsEnvLoad(baseURLConfig)) {
return {
apiKey: apiKeyConfig,
baseURL: baseURLConfig,
};
}
// Build auth fields array
const authFields: string[] = [];
if (needsEnvLoad(baseURLConfig)) {
authFields.push(getEnvVarName(baseURLConfig, 'OCR_BASEURL'));
}
if (needsEnvLoad(apiKeyConfig)) {
authFields.push(getEnvVarName(apiKeyConfig, 'OCR_API_KEY'));
}
// Load auth values
const authValues = await context.loadAuthValues({
userId: context.req.user?.id || '',
authFields,
optional: new Set(['OCR_BASEURL']),
});
// Resolve each value
const apiKey = await resolveConfigValue(apiKeyConfig, 'OCR_API_KEY', authValues);
const baseURL = await resolveConfigValue(
baseURLConfig,
'OCR_BASEURL',
authValues,
DEFAULT_MISTRAL_BASE_URL,
);
return { apiKey, baseURL };
}
/**
* Gets the model configuration
*/
function getModelConfig(ocrConfig: TCustomConfig['ocr']): string {
const modelConfig = ocrConfig?.mistralModel || '';
if (!modelConfig.trim()) {
return DEFAULT_MISTRAL_MODEL;
}
if (envVarRegex.test(modelConfig)) {
return extractEnvVariable(modelConfig) || DEFAULT_MISTRAL_MODEL;
}
return modelConfig.trim();
}
/**
* Determines document type based on file
*/
function getDocumentType(file: Express.Multer.File): 'image_url' | 'document_url' {
const mimetype = (file.mimetype || '').toLowerCase();
const originalname = file.originalname || '';
const isImage =
mimetype.startsWith('image') || /\.(png|jpe?g|gif|bmp|webp|tiff?)$/i.test(originalname);
return isImage ? 'image_url' : 'document_url';
}
/**
* Processes OCR result pages into aggregated text and images
*/
function processOCRResult(ocrResult: OCRResult): { text: string; images: string[] } {
let aggregatedText = '';
const images: string[] = [];
ocrResult.pages.forEach((page: OCRResultPage, index: number) => {
if (ocrResult.pages.length > 1) {
aggregatedText += `# PAGE ${index + 1}\n`;
}
aggregatedText += page.markdown + '\n\n';
if (!page.images || page.images.length === 0) {
return;
}
page.images.forEach((image: OCRImage) => {
if (image.image_base64) {
images.push(image.image_base64);
}
});
});
return { text: aggregatedText, images };
}
/**
* Creates an error message for OCR operations
*/
function createOCRError(error: unknown, baseMessage: string): Error {
const axiosError = error as AxiosError<MistralOCRError>;
const detail = axiosError?.response?.data?.detail;
const message = detail || baseMessage;
const responseMessage = axiosError?.response?.data?.message;
const errorLog = logAxiosError({ error: axiosError, message });
const fullMessage = responseMessage ? `${errorLog} - ${responseMessage}` : errorLog;
return new Error(fullMessage);
}
/**
* Uploads a file to the Mistral OCR API and processes the OCR result.
*
* @param params - The params object.
* @param params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user
* @param params.file - The file object, which is part of the request. The file object should
* have a `mimetype` property that tells us the file type
* @param params.loadAuthValues - Function to load authentication values
* @returns - The result object containing the processed `text` and `images` (not currently used),
* along with the `filename` and `bytes` properties.
*/
export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRUploadResult> => {
try {
const { apiKey, baseURL } = await loadAuthConfig(context);
const model = getModelConfig(context.req.app.locals?.ocr);
// Upload file
const mistralFile = await uploadDocumentToMistral({
filePath: context.file.path,
fileName: context.file.originalname,
apiKey,
baseURL,
});
// Get signed URL
const signedUrlResponse = await getSignedUrl({
apiKey,
baseURL,
fileId: mistralFile.id,
});
// Perform OCR
const documentType = getDocumentType(context.file);
const ocrResult = await performOCR({
apiKey,
baseURL,
model,
url: signedUrlResponse.url,
documentType,
});
// Process result
const { text, images } = processOCRResult(ocrResult);
return {
filename: context.file.originalname,
bytes: text.length * 4,
filepath: FileSources.mistral_ocr,
text,
images,
};
} catch (error) {
throw createOCRError(error, 'Error uploading document to Mistral OCR API');
}
};
/**
* Use Azure Mistral OCR API to processe the OCR result.
*
* @param params - The params object.
* @param params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user
* @param params.file - The file object, which is part of the request. The file object should
* have a `mimetype` property that tells us the file type
* @param params.loadAuthValues - Function to load authentication values
* @returns - The result object containing the processed `text` and `images` (not currently used),
* along with the `filename` and `bytes` properties.
*/
export const uploadAzureMistralOCR = async (
context: OCRContext,
): Promise<MistralOCRUploadResult> => {
try {
const { apiKey, baseURL } = await loadAuthConfig(context);
const model = getModelConfig(context.req.app.locals?.ocr);
// Read file as base64
const buffer = fs.readFileSync(context.file.path);
const base64 = buffer.toString('base64');
// Perform OCR directly with base64
const documentType = getDocumentType(context.file);
const ocrResult = await performOCR({
apiKey,
baseURL,
model,
url: `data:image/jpeg;base64,${base64}`,
documentType,
});
// Process result
const { text, images } = processOCRResult(ocrResult);
return {
filename: context.file.originalname,
bytes: text.length * 4,
filepath: FileSources.azure_mistral_ocr,
text,
images,
};
} catch (error) {
throw createOCRError(error, 'Error uploading document to Azure Mistral OCR API');
}
};

View file

@ -9,6 +9,8 @@ export * from './flow/manager';
export * from './agents';
/* Endpoints */
export * from './endpoints';
/* Files */
export * from './files';
/* types */
export type * from './mcp/types';
export type * from './flow/types';

View file

@ -88,7 +88,7 @@ export class MCPConnection extends EventEmitter {
this.client = new Client(
{
name: '@librechat/api-client',
version: '1.2.2',
version: '1.2.3',
},
{
capabilities: {},

View file

@ -1,4 +1,5 @@
export * from './azure';
export * from './events';
export * from './mistral';
export * from './openai';
export * from './run';

View file

@ -0,0 +1,82 @@
/**
* Mistral OCR API Types
* Based on https://docs.mistral.ai/api/#tag/ocr/operation/ocr_v1_ocr_post
*/
export interface MistralFileUploadResponse {
id: string;
object: string;
bytes: number;
created_at: number;
filename: string;
purpose: string;
}
export interface MistralSignedUrlResponse {
url: string;
expires_at: number;
}
export interface OCRImage {
id: string;
top_left_x: number;
top_left_y: number;
bottom_right_x: number;
bottom_right_y: number;
image_base64: string;
image_annotation?: string;
}
export interface PageDimensions {
dpi: number;
height: number;
width: number;
}
export interface OCRResultPage {
index: number;
markdown: string;
images: OCRImage[];
dimensions: PageDimensions;
}
export interface OCRUsageInfo {
pages_processed: number;
doc_size_bytes: number;
}
export interface OCRResult {
pages: OCRResultPage[];
model: string;
document_annotation?: string | null;
usage_info: OCRUsageInfo;
}
export interface MistralOCRRequest {
model: string;
image_limit?: number;
include_image_base64?: boolean;
document: {
type: 'document_url' | 'image_url';
document_url?: string;
image_url?: string;
};
}
export interface MistralOCRError {
detail?: string;
message?: string;
error?: {
message?: string;
type?: string;
code?: string;
};
}
export interface MistralOCRUploadResult {
filename: string;
bytes: number;
filepath: string;
text: string;
images: string[];
}

View file

@ -1,7 +1,6 @@
const axios = require('axios');
const { createAxiosInstance } = require('./index');
import axios from 'axios';
import { createAxiosInstance } from './axios';
// Mock axios
jest.mock('axios', () => ({
interceptors: {
request: { use: jest.fn(), eject: jest.fn() },
@ -20,7 +19,13 @@ jest.mock('axios', () => ({
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
reset: jest.fn().mockImplementation(function () {
reset: jest.fn().mockImplementation(function (this: {
get: jest.Mock;
post: jest.Mock;
put: jest.Mock;
delete: jest.Mock;
create: jest.Mock;
}) {
this.get.mockClear();
this.post.mockClear();
this.put.mockClear();

View file

@ -0,0 +1,77 @@
import axios from 'axios';
import { logger } from '@librechat/data-schemas';
import type { AxiosInstance, AxiosProxyConfig, AxiosError } from 'axios';
/**
* Logs Axios errors based on the error object and a custom message.
* @param options - The options object.
* @param options.message - The custom message to be logged.
* @param options.error - The Axios error object.
* @returns The log message.
*/
export const logAxiosError = ({ message, error }: { message: string; error: AxiosError }) => {
let logMessage = message;
try {
const stack = error.stack || 'No stack trace available';
if (error.response?.status) {
const { status, headers, data } = error.response;
logMessage = `${message} The server responded with status ${status}: ${error.message}`;
logger.error(logMessage, {
status,
headers,
data,
stack,
});
} else if (error.request) {
const { method, url } = error.config || {};
logMessage = `${message} No response received for ${method ? method.toUpperCase() : ''} ${url || ''}: ${error.message}`;
logger.error(logMessage, {
requestInfo: { method, url },
stack,
});
} else if (error?.message?.includes("Cannot read properties of undefined (reading 'status')")) {
logMessage = `${message} It appears the request timed out or was unsuccessful: ${error.message}`;
logger.error(logMessage, { stack });
} else {
logMessage = `${message} An error occurred while setting up the request: ${error.message}`;
logger.error(logMessage, { stack });
}
} catch (err: unknown) {
logMessage = `Error in logAxiosError: ${(err as Error).message}`;
logger.error(logMessage, { stack: (err as Error).stack || 'No stack trace available' });
}
return logMessage;
};
/**
* Creates and configures an Axios instance with optional proxy settings.
* @returns A configured Axios instance
* @throws If there's an issue creating the Axios instance or parsing the proxy URL
*/
export function createAxiosInstance(): AxiosInstance {
const instance = axios.create();
if (process.env.proxy) {
try {
const url = new URL(process.env.proxy);
const proxyConfig: Partial<AxiosProxyConfig> = {
host: url.hostname.replace(/^\[|\]$/g, ''),
protocol: url.protocol.replace(':', ''),
};
if (url.port) {
proxyConfig.port = parseInt(url.port, 10);
}
instance.defaults.proxy = proxyConfig as AxiosProxyConfig;
} catch (error) {
console.error('Error parsing proxy URL:', error);
throw new Error(`Invalid proxy URL: ${process.env.proxy}`);
}
}
return instance;
}

View file

@ -1,3 +1,4 @@
export * from './axios';
export * from './azure';
export * from './common';
export * from './events';

View file

@ -593,6 +593,7 @@ export type TStartupConfig = {
export enum OCRStrategy {
MISTRAL_OCR = 'mistral_ocr',
CUSTOM_OCR = 'custom_ocr',
AZURE_MISTRAL_OCR = 'azure_mistral_ocr',
}
export enum SearchCategories {
@ -1363,7 +1364,7 @@ export enum Constants {
/** Key for the app's version. */
VERSION = 'v0.7.8',
/** Key for the Custom Config's version (librechat.yaml). */
CONFIG_VERSION = '1.2.6',
CONFIG_VERSION = '1.2.8',
/** Standard value for the first message's `parentMessageId` value, to indicate no parent exists. */
NO_PARENT = '00000000-0000-0000-0000-000000000000',
/** Standard value for the initial conversationId before a request is sent */

View file

@ -10,6 +10,7 @@ export enum FileSources {
vectordb = 'vectordb',
execute_code = 'execute_code',
mistral_ocr = 'mistral_ocr',
azure_mistral_ocr = 'azure_mistral_ocr',
text = 'text',
}