👁️ feat: Azure Mistral OCR Strategy (#7888)

* 👁️ feat: Add Azure Mistral OCR strategy and endpoint integration

This commit introduces a new OCR strategy named 'azure_mistral_ocr', allowing the use of a Mistral OCR endpoint deployed on Azure. The configuration, schemas, and file upload strategies have been updated to support this integration, enabling seamless OCR processing via Azure-hosted Mistral services.

* 🗑️ chore: Clean up .gitignore by removing commented-out uncommon directory name

* chore: remove unused vars

* refactor: Move createAxiosInstance to packages/api/utils and update imports

- Removed the createAxiosInstance function from the config module and relocated it to a new utils module for better organization.
- Updated import paths in relevant files to reflect the new location of createAxiosInstance.
- Added tests for createAxiosInstance to ensure proper functionality and proxy configuration handling.

* chore: move axios helpers to packages/api

- Added logAxiosError function to @librechat/api for centralized error logging.
- Updated imports across various files to use the new logAxiosError function.
- Removed the old axios.js utility file as it is no longer needed.

* chore: Update Jest moduleNameMapper for improved path resolution

- Added a new mapping for '~/' to resolve module paths in Jest configuration, enhancing import handling for the project.

* feat: Implement Mistral OCR API integration in TS

* chore: Update MistralOCR tests based on new imports

* fix: Enhance MistralOCR configuration handling and tests

- Introduced helper functions for resolving configuration values from environment variables or hardcoded settings.
- Updated the uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration resolution logic.
- Improved test cases to ensure correct behavior when mixing environment variables and hardcoded values.
- Mocked file upload and signed URL responses in tests to validate functionality without external dependencies.

* feat: Enhance MistralOCR functionality with improved configuration and error handling

- Introduced helper functions for loading authentication configuration and resolving values from environment variables.
- Updated uploadMistralOCR and uploadAzureMistralOCR functions to utilize the new configuration logic.
- Added utility functions for processing OCR results and creating error messages.
- Improved document type determination and result aggregation for better OCR processing.

* refactor: Reorganize OCR type imports in Mistral CRUD file

- Moved OCRResult, OCRResultPage, and OCRImage imports to a more logical grouping for better readability and maintainability.

* feat: Add file exports to API and create files index

* chore: Update OCR types for enhanced structure and clarity

- Redesigned OCRImage interface to include mandatory fields and improved naming conventions.
- Added PageDimensions interface for better representation of page metrics.
- Updated OCRResultPage to include dimensions and mandatory images array.
- Refined OCRResult to include document annotation and usage information.

* refactor: use TS counterpart of uploadOCR methods

* ci: Update MistralOCR tests to reflect new OCR result structure

* chore: Bump version of @librechat/api to 1.2.3 in package.json and package-lock.json

* chore: Update CONFIG_VERSION to 1.2.8

* chore: remove unused sendEvent function from config module (now imported from '@librechat/api')

* chore: remove MistralOCR service files and tests (now in '@librechat/api')

* ci: update logger import in ModelService tests to use @librechat/data-schemas

---------

Co-authored-by: arthurolivierfortin <arthurolivier.fortin@gmail.com>
This commit is contained in:
Danny Avila 2025-06-13 15:14:57 -04:00 committed by GitHub
parent 46ff008b07
commit 5f2d1c5dc9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 2245 additions and 1235 deletions

View file

@ -0,0 +1,131 @@
import axios from 'axios';
import { createAxiosInstance } from './axios';
jest.mock('axios', () => ({
interceptors: {
request: { use: jest.fn(), eject: jest.fn() },
response: { use: jest.fn(), eject: jest.fn() },
},
create: jest.fn().mockReturnValue({
defaults: {
proxy: null,
},
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
}),
get: jest.fn().mockResolvedValue({ data: {} }),
post: jest.fn().mockResolvedValue({ data: {} }),
put: jest.fn().mockResolvedValue({ data: {} }),
delete: jest.fn().mockResolvedValue({ data: {} }),
reset: jest.fn().mockImplementation(function (this: {
get: jest.Mock;
post: jest.Mock;
put: jest.Mock;
delete: jest.Mock;
create: jest.Mock;
}) {
this.get.mockClear();
this.post.mockClear();
this.put.mockClear();
this.delete.mockClear();
this.create.mockClear();
}),
}));
describe('createAxiosInstance', () => {
const originalEnv = process.env;
beforeEach(() => {
// Reset mocks
jest.clearAllMocks();
// Create a clean copy of process.env
process.env = { ...originalEnv };
// Default: no proxy
delete process.env.proxy;
});
afterAll(() => {
// Restore original process.env
process.env = originalEnv;
});
test('creates an axios instance without proxy when no proxy env is set', () => {
const instance = createAxiosInstance();
expect(axios.create).toHaveBeenCalledTimes(1);
expect(instance.defaults.proxy).toBeNull();
});
test('configures proxy correctly with hostname and protocol', () => {
process.env.proxy = 'http://example.com';
const instance = createAxiosInstance();
expect(axios.create).toHaveBeenCalledTimes(1);
expect(instance.defaults.proxy).toEqual({
host: 'example.com',
protocol: 'http',
});
});
test('configures proxy correctly with hostname, protocol and port', () => {
process.env.proxy = 'https://proxy.example.com:8080';
const instance = createAxiosInstance();
expect(axios.create).toHaveBeenCalledTimes(1);
expect(instance.defaults.proxy).toEqual({
host: 'proxy.example.com',
protocol: 'https',
port: 8080,
});
});
test('handles proxy URLs with authentication', () => {
process.env.proxy = 'http://user:pass@proxy.example.com:3128';
const instance = createAxiosInstance();
expect(axios.create).toHaveBeenCalledTimes(1);
expect(instance.defaults.proxy).toEqual({
host: 'proxy.example.com',
protocol: 'http',
port: 3128,
// Note: The current implementation doesn't handle auth - if needed, add this functionality
});
});
test('throws error when proxy URL is invalid', () => {
process.env.proxy = 'invalid-url';
expect(() => createAxiosInstance()).toThrow('Invalid proxy URL');
expect(axios.create).toHaveBeenCalledTimes(1);
});
// If you want to test the actual URL parsing more thoroughly
test('handles edge case proxy URLs correctly', () => {
// IPv6 address
process.env.proxy = 'http://[::1]:8080';
let instance = createAxiosInstance();
expect(instance.defaults.proxy).toEqual({
host: '::1',
protocol: 'http',
port: 8080,
});
// URL with path (which should be ignored for proxy config)
process.env.proxy = 'http://proxy.example.com:8080/some/path';
instance = createAxiosInstance();
expect(instance.defaults.proxy).toEqual({
host: 'proxy.example.com',
protocol: 'http',
port: 8080,
});
});
});

View file

@ -0,0 +1,77 @@
import axios from 'axios';
import { logger } from '@librechat/data-schemas';
import type { AxiosInstance, AxiosProxyConfig, AxiosError } from 'axios';
/**
* Logs Axios errors based on the error object and a custom message.
* @param options - The options object.
* @param options.message - The custom message to be logged.
* @param options.error - The Axios error object.
* @returns The log message.
*/
export const logAxiosError = ({ message, error }: { message: string; error: AxiosError }) => {
let logMessage = message;
try {
const stack = error.stack || 'No stack trace available';
if (error.response?.status) {
const { status, headers, data } = error.response;
logMessage = `${message} The server responded with status ${status}: ${error.message}`;
logger.error(logMessage, {
status,
headers,
data,
stack,
});
} else if (error.request) {
const { method, url } = error.config || {};
logMessage = `${message} No response received for ${method ? method.toUpperCase() : ''} ${url || ''}: ${error.message}`;
logger.error(logMessage, {
requestInfo: { method, url },
stack,
});
} else if (error?.message?.includes("Cannot read properties of undefined (reading 'status')")) {
logMessage = `${message} It appears the request timed out or was unsuccessful: ${error.message}`;
logger.error(logMessage, { stack });
} else {
logMessage = `${message} An error occurred while setting up the request: ${error.message}`;
logger.error(logMessage, { stack });
}
} catch (err: unknown) {
logMessage = `Error in logAxiosError: ${(err as Error).message}`;
logger.error(logMessage, { stack: (err as Error).stack || 'No stack trace available' });
}
return logMessage;
};
/**
* Creates and configures an Axios instance with optional proxy settings.
* @returns A configured Axios instance
* @throws If there's an issue creating the Axios instance or parsing the proxy URL
*/
export function createAxiosInstance(): AxiosInstance {
const instance = axios.create();
if (process.env.proxy) {
try {
const url = new URL(process.env.proxy);
const proxyConfig: Partial<AxiosProxyConfig> = {
host: url.hostname.replace(/^\[|\]$/g, ''),
protocol: url.protocol.replace(':', ''),
};
if (url.port) {
proxyConfig.port = parseInt(url.port, 10);
}
instance.defaults.proxy = proxyConfig as AxiosProxyConfig;
} catch (error) {
console.error('Error parsing proxy URL:', error);
throw new Error(`Invalid proxy URL: ${process.env.proxy}`);
}
}
return instance;
}

View file

@ -1,3 +1,4 @@
export * from './axios';
export * from './azure';
export * from './common';
export * from './events';