💻 feat: Add Proxy Config for Mistral OCR API (#9629)

* 💻 feat: Add proxy configuration support for Mistral OCR API requests

* refactor: Implement proxy support for Mistral API requests using HttpsProxyAgent
This commit is contained in:
Danny Avila 2025-09-14 18:50:41 -04:00 committed by GitHub
parent 2ce8f1f686
commit 5bfb06b417
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 480 additions and 37 deletions

View file

@ -10,6 +10,9 @@ jest.mock('form-data', () => {
getLength: jest.fn().mockReturnValue(100),
}));
});
jest.mock('https-proxy-agent', () => ({
HttpsProxyAgent: jest.fn().mockImplementation((url) => ({ proxyUrl: url })),
}));
jest.mock('axios', () => {
const mockAxiosInstance = {
get: jest.fn().mockResolvedValue({ data: {} }),
@ -44,6 +47,7 @@ jest.mock('~/utils/axios', () => ({
import * as fs from 'fs';
import axios from 'axios';
import { HttpsProxyAgent } from 'https-proxy-agent';
import type { Readable } from 'stream';
import type {
MistralFileUploadResponse,
@ -1182,6 +1186,8 @@ describe('MistralOCR Service', () => {
describe('Mixed env var and hardcoded configuration', () => {
beforeEach(() => {
// Clean up any PROXY env var from previous tests
delete process.env.PROXY;
const mockReadStream: MockReadStream = {
on: jest.fn().mockImplementation(function (
this: MockReadStream,
@ -1708,9 +1714,403 @@ describe('MistralOCR Service', () => {
});
});
describe('Proxy Configuration', () => {
const originalProxy = process.env.PROXY;
beforeEach(() => {
// Reset the HttpsProxyAgent mock to its default implementation
(HttpsProxyAgent as unknown as jest.Mock).mockImplementation((url) => ({ proxyUrl: url }));
// Clear any previous axios mock calls
mockAxios.post!.mockClear();
mockAxios.get!.mockClear();
mockAxios.delete!.mockClear();
});
afterEach(() => {
if (originalProxy) {
process.env.PROXY = originalProxy;
} else {
delete process.env.PROXY;
}
// Clear mocks after each test to prevent leaking
mockAxios.post!.mockClear();
mockAxios.get!.mockClear();
mockAxios.delete!.mockClear();
});
describe('uploadDocumentToMistral with proxy', () => {
beforeEach(() => {
const mockReadStream: MockReadStream = {
on: jest.fn().mockImplementation(function (
this: MockReadStream,
event: string,
handler: () => void,
) {
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function (this: MockReadStream) {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
path: '/path/to/test.pdf',
fd: 1,
flags: 'r',
mode: 0o666,
autoClose: true,
bytesRead: 0,
closed: false,
pending: false,
};
(jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream);
});
it('should use proxy configuration when PROXY env var is set', async () => {
process.env.PROXY = 'http://proxy.example.com:8080';
const mockResponse: { data: MistralFileUploadResponse } = {
data: {
id: 'file-proxy-123',
object: 'file',
bytes: 1024,
created_at: Date.now(),
filename: 'test.pdf',
purpose: 'ocr',
},
};
mockAxios.post!.mockResolvedValueOnce(mockResponse);
await uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files',
expect.anything(),
expect.objectContaining({
httpsAgent: expect.objectContaining({
proxyUrl: 'http://proxy.example.com:8080',
}),
}),
);
});
it('should handle proxy URL with authentication', async () => {
process.env.PROXY = 'http://user:pass@proxy.example.com:8080';
const mockResponse: { data: MistralFileUploadResponse } = {
data: {
id: 'file-proxy-auth-123',
object: 'file',
bytes: 1024,
created_at: Date.now(),
filename: 'test.pdf',
purpose: 'ocr',
},
};
mockAxios.post!.mockResolvedValueOnce(mockResponse);
await uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files',
expect.anything(),
expect.objectContaining({
httpsAgent: expect.objectContaining({
proxyUrl: 'http://user:pass@proxy.example.com:8080',
}),
}),
);
});
it('should handle IPv6 proxy addresses', async () => {
process.env.PROXY = 'http://[::1]:8080';
const mockResponse: { data: MistralFileUploadResponse } = {
data: {
id: 'file-proxy-ipv6-123',
object: 'file',
bytes: 1024,
created_at: Date.now(),
filename: 'test.pdf',
purpose: 'ocr',
},
};
mockAxios.post!.mockResolvedValueOnce(mockResponse);
await uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files',
expect.anything(),
expect.objectContaining({
httpsAgent: expect.objectContaining({
proxyUrl: 'http://[::1]:8080',
}),
}),
);
});
it('should not use proxy when PROXY env var is not set', async () => {
delete process.env.PROXY;
const mockResponse: { data: MistralFileUploadResponse } = {
data: {
id: 'file-no-proxy-123',
object: 'file',
bytes: 1024,
created_at: Date.now(),
filename: 'test.pdf',
purpose: 'ocr',
},
};
mockAxios.post!.mockResolvedValueOnce(mockResponse);
await uploadDocumentToMistral({
filePath: '/path/to/test.pdf',
fileName: 'test.pdf',
apiKey: 'test-api-key',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files',
expect.anything(),
expect.not.objectContaining({
httpsAgent: expect.anything(),
}),
);
});
});
describe('performOCR with proxy', () => {
it('should use proxy configuration when PROXY env var is set', async () => {
process.env.PROXY = 'http://proxy.example.com:3128';
const mockResponse: { data: OCRResult } = {
data: {
model: 'mistral-ocr-latest',
pages: [
{
index: 0,
markdown: 'Proxy test content',
images: [],
dimensions: { dpi: 300, height: 1100, width: 850 },
},
],
document_annotation: '',
usage_info: {
pages_processed: 1,
doc_size_bytes: 1024,
},
},
};
mockAxios.post!.mockResolvedValueOnce(mockResponse);
await performOCR({
apiKey: 'test-api-key',
url: 'https://document-url.com',
model: 'mistral-ocr-latest',
documentType: 'document_url',
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/ocr',
expect.anything(),
expect.objectContaining({
httpsAgent: expect.objectContaining({
proxyUrl: 'http://proxy.example.com:3128',
}),
}),
);
});
it('should handle malformed proxy URLs gracefully', async () => {
(HttpsProxyAgent as unknown as jest.Mock).mockImplementationOnce(() => {
throw new Error('Invalid URL');
});
process.env.PROXY = 'not-a-valid-url';
const mockResponse: { data: OCRResult } = {
data: {
model: 'mistral-ocr-latest',
pages: [
{
index: 0,
markdown: 'Test content',
images: [],
dimensions: { dpi: 300, height: 1100, width: 850 },
},
],
document_annotation: '',
usage_info: {
pages_processed: 1,
doc_size_bytes: 1024,
},
},
};
mockAxios.post!.mockResolvedValueOnce(mockResponse);
await expect(
performOCR({
apiKey: 'test-api-key',
url: 'https://document-url.com',
}),
).rejects.toThrow('Invalid URL');
});
});
describe('Azure Mistral OCR with proxy', () => {
beforeEach(() => {
(jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(
Buffer.from('mock-file-content'),
);
});
it('should use proxy for Azure Mistral OCR requests', async () => {
process.env.PROXY = 'http://proxy.example.com:8080';
mockLoadAuthValues.mockResolvedValue({
OCR_API_KEY: 'azure-api-key',
OCR_BASEURL: 'https://azure.mistral.ai/v1',
});
mockAxios.post!.mockResolvedValueOnce({
data: {
model: 'mistral-ocr-latest',
pages: [
{
index: 0,
markdown: 'Azure OCR with proxy',
images: [],
dimensions: { dpi: 300, height: 1100, width: 850 },
},
],
document_annotation: '',
usage_info: {
pages_processed: 1,
doc_size_bytes: 1024,
},
},
});
const req = {
user: { id: 'user123' },
config: {
ocr: {
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-ocr-latest',
},
},
} as unknown as ServerRequest;
const file = {
path: '/tmp/upload/azure-file.pdf',
originalname: 'azure-document.pdf',
mimetype: 'application/pdf',
} as Express.Multer.File;
await uploadAzureMistralOCR({
req,
file,
loadAuthValues: mockLoadAuthValues,
});
expect(mockAxios.post).toHaveBeenCalledWith(
'https://azure.mistral.ai/v1/ocr',
expect.anything(),
expect.objectContaining({
httpsAgent: expect.objectContaining({
proxyUrl: 'http://proxy.example.com:8080',
}),
}),
);
});
});
describe('getSignedUrl with proxy', () => {
it('should use proxy configuration when PROXY env var is set', async () => {
process.env.PROXY = 'https://secure-proxy.example.com:443';
const mockResponse: { data: MistralSignedUrlResponse } = {
data: {
url: 'https://signed-url.com',
expires_at: Date.now() + 86400000,
},
};
mockAxios.get!.mockResolvedValueOnce(mockResponse);
await getSignedUrl({
fileId: 'file-123',
apiKey: 'test-api-key',
});
expect(mockAxios.get).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-123/url?expiry=24',
expect.objectContaining({
httpsAgent: expect.objectContaining({
proxyUrl: 'https://secure-proxy.example.com:443',
}),
}),
);
});
});
describe('deleteMistralFile with proxy', () => {
it('should use proxy configuration when PROXY env var is set', async () => {
process.env.PROXY = 'socks5://proxy.example.com:1080';
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
await deleteMistralFile({
fileId: 'file-123',
apiKey: 'test-api-key',
});
expect(mockAxios.delete).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-123',
expect.objectContaining({
httpsAgent: expect.objectContaining({
proxyUrl: 'socks5://proxy.example.com:1080',
}),
}),
);
});
});
});
describe('uploadAzureMistralOCR', () => {
beforeEach(() => {
(jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(Buffer.from('mock-file-content'));
// Reset the HttpsProxyAgent mock to its default implementation for Azure tests
(HttpsProxyAgent as unknown as jest.Mock).mockImplementation((url) => ({ proxyUrl: url }));
// Clean up any PROXY env var from previous tests
delete process.env.PROXY;
// Reset axios mocks completely to clear any queued responses
mockAxios.post!.mockReset();
mockAxios.get!.mockReset();
mockAxios.delete!.mockReset();
// Re-establish default resolved values
mockAxios.post!.mockResolvedValue({ data: {} });
mockAxios.get!.mockResolvedValue({ data: {} });
mockAxios.delete!.mockResolvedValue({ data: {} });
});
it('should process OCR using Azure Mistral with base64 encoding', async () => {
@ -1796,6 +2196,11 @@ describe('MistralOCR Service', () => {
});
describe('Mixed env var and hardcoded configuration', () => {
beforeEach(() => {
// Clean up any PROXY env var from previous tests
delete process.env.PROXY;
});
it('should preserve hardcoded baseURL when only apiKey is an env var', async () => {
// This test demonstrates the current bug
mockLoadAuthValues.mockResolvedValue({

View file

@ -2,6 +2,7 @@ import * as fs from 'fs';
import * as path from 'path';
import FormData from 'form-data';
import { logger } from '@librechat/data-schemas';
import { HttpsProxyAgent } from 'https-proxy-agent';
import {
FileSources,
envVarRegex,
@ -9,7 +10,7 @@ import {
extractVariableName,
} from 'librechat-data-provider';
import type { TCustomConfig } from 'librechat-data-provider';
import type { AxiosError } from 'axios';
import type { AxiosError, AxiosRequestConfig } from 'axios';
import type {
MistralFileUploadResponse,
MistralSignedUrlResponse,
@ -77,15 +78,21 @@ export async function uploadDocumentToMistral({
const fileStream = fs.createReadStream(filePath);
form.append('file', fileStream, { filename: actualFileName });
const config: AxiosRequestConfig = {
headers: {
Authorization: `Bearer ${apiKey}`,
...form.getHeaders(),
},
maxBodyLength: Infinity,
maxContentLength: Infinity,
};
if (process.env.PROXY) {
config.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
return axios
.post(`${baseURL}/files`, form, {
headers: {
Authorization: `Bearer ${apiKey}`,
...form.getHeaders(),
},
maxBodyLength: Infinity,
maxContentLength: Infinity,
})
.post(`${baseURL}/files`, form, config)
.then((res) => res.data)
.catch((error) => {
throw error;
@ -103,12 +110,18 @@ export async function getSignedUrl({
expiry?: number;
baseURL?: string;
}): Promise<MistralSignedUrlResponse> {
const config: AxiosRequestConfig = {
headers: {
Authorization: `Bearer ${apiKey}`,
},
};
if (process.env.PROXY) {
config.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
return axios
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, {
headers: {
Authorization: `Bearer ${apiKey}`,
},
})
.get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, config)
.then((res) => res.data)
.catch((error) => {
logger.error('Error fetching signed URL:', error.message);
@ -139,6 +152,18 @@ export async function performOCR({
documentType?: 'document_url' | 'image_url';
}): Promise<OCRResult> {
const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url';
const config: AxiosRequestConfig = {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
};
if (process.env.PROXY) {
config.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
return axios
.post(
`${baseURL}/ocr`,
@ -151,12 +176,7 @@ export async function performOCR({
[documentKey]: url,
},
},
{
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
},
config,
)
.then((res) => res.data)
.catch((error) => {
@ -182,12 +202,18 @@ export async function deleteMistralFile({
apiKey: string;
baseURL?: string;
}): Promise<void> {
const config: AxiosRequestConfig = {
headers: {
Authorization: `Bearer ${apiKey}`,
},
};
if (process.env.PROXY) {
config.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
try {
const result = await axios.delete(`${baseURL}/files/${fileId}`, {
headers: {
Authorization: `Bearer ${apiKey}`,
},
});
const result = await axios.delete(`${baseURL}/files/${fileId}`, config);
logger.debug(`Mistral file ${fileId} deleted successfully:`, result.data);
} catch (error) {
logger.error(`Error deleting Mistral file ${fileId}:`, error);
@ -543,17 +569,23 @@ async function createJWT(serviceKey: GoogleServiceAccount): Promise<string> {
* Exchanges JWT for access token
*/
async function exchangeJWTForAccessToken(jwt: string): Promise<string> {
const config: AxiosRequestConfig = {
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
};
if (process.env.PROXY) {
config.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
const response = await axios.post(
'https://oauth2.googleapis.com/token',
new URLSearchParams({
grant_type: 'urn:ietf:params:oauth:grant-type:jwt-bearer',
assertion: jwt,
}),
{
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
},
config,
);
if (!response.data?.access_token) {
@ -608,14 +640,20 @@ async function performGoogleVertexOCR({
},
});
const config: AxiosRequestConfig = {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${accessToken}`,
Accept: 'application/json',
},
};
if (process.env.PROXY) {
config.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
}
return axios
.post(baseURL, requestBody, {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${accessToken}`,
Accept: 'application/json',
},
})
.post(baseURL, requestBody, config)
.then((res) => {
logger.debug('Google Vertex AI response received');
return res.data;