// Mock setup must be hoisted jest.mock('fs'); jest.mock('form-data', () => { return jest.fn().mockImplementation(() => ({ append: jest.fn(), getHeaders: jest .fn() .mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }), getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')), getLength: jest.fn().mockReturnValue(100), })); }); jest.mock('axios', () => { const mockAxiosInstance = { get: jest.fn().mockResolvedValue({ data: {} }), post: jest.fn().mockResolvedValue({ data: {} }), put: jest.fn().mockResolvedValue({ data: {} }), delete: jest.fn().mockResolvedValue({ data: {} }), interceptors: { request: { use: jest.fn(), eject: jest.fn(), clear: jest.fn() }, response: { use: jest.fn(), eject: jest.fn(), clear: jest.fn() }, }, defaults: { proxy: null, }, }; return { ...mockAxiosInstance, create: jest.fn().mockReturnValue(mockAxiosInstance), }; }); jest.mock('@librechat/data-schemas', () => ({ logger: { error: jest.fn(), }, })); jest.mock('~/utils/axios', () => ({ createAxiosInstance: () => jest.requireMock('axios'), logAxiosError: jest.fn(({ message }) => message || 'Error'), })); import * as fs from 'fs'; import axios from 'axios'; import type { Request as ExpressRequest } from 'express'; import type { Readable } from 'stream'; import type { MistralFileUploadResponse, MistralSignedUrlResponse, OCRResult } from '~/types'; import { logger as mockLogger } from '@librechat/data-schemas'; import { uploadDocumentToMistral, uploadMistralOCR, uploadAzureMistralOCR, getSignedUrl, performOCR, } from './crud'; interface MockReadStream extends Partial { on: jest.Mock; pipe: jest.Mock; pause: jest.Mock; resume: jest.Mock; emit: jest.Mock; once: jest.Mock; destroy: jest.Mock; path?: string; fd?: number; flags?: string; mode?: number; autoClose?: boolean; bytesRead?: number; closed?: boolean; pending?: boolean; } const mockAxios = jest.mocked(axios); const mockLoadAuthValues = jest.fn(); describe('MistralOCR Service', () => { afterEach(() => { jest.clearAllMocks(); }); describe('uploadDocumentToMistral', () => { beforeEach(() => { // Create a more complete mock for file streams that FormData can work with const mockReadStream: MockReadStream = { on: jest.fn().mockImplementation(function ( this: MockReadStream, event: string, handler: () => void, ) { // Simulate immediate 'end' event to make FormData complete processing if (event === 'end') { handler(); } return this; }), pipe: jest.fn().mockImplementation(function (this: MockReadStream) { return this; }), pause: jest.fn(), resume: jest.fn(), emit: jest.fn(), once: jest.fn(), destroy: jest.fn(), path: '/path/to/test.pdf', fd: 1, flags: 'r', mode: 0o666, autoClose: true, bytesRead: 0, closed: false, pending: false, }; (jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream); }); it('should upload a document to Mistral API using file streaming', async () => { const mockResponse: { data: MistralFileUploadResponse } = { data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'test.pdf', purpose: 'ocr', }, }; mockAxios.post!.mockResolvedValueOnce(mockResponse); try { const result = await uploadDocumentToMistral({ filePath: '/path/to/test.pdf', fileName: 'test.pdf', apiKey: 'test-api-key', }); // Check that createReadStream was called with the correct file path expect(jest.mocked(fs).createReadStream).toHaveBeenCalledWith('/path/to/test.pdf'); // Since we're mocking FormData, we'll just check that axios was called correctly expect(mockAxios.post).toHaveBeenCalledWith( 'https://api.mistral.ai/v1/files', expect.anything(), expect.objectContaining({ headers: expect.objectContaining({ Authorization: 'Bearer test-api-key', }), maxBodyLength: Infinity, maxContentLength: Infinity, }), ); expect(result).toEqual(mockResponse.data); } catch (error) { console.error('Test error:', error); throw error; } }); it('should handle errors during document upload', async () => { const errorMessage = 'API error'; mockAxios.post!.mockRejectedValueOnce(new Error(errorMessage)); await expect( uploadDocumentToMistral({ filePath: '/path/to/test.pdf', fileName: 'test.pdf', apiKey: 'test-api-key', }), ).rejects.toThrow(errorMessage); }); }); describe('getSignedUrl', () => { it('should fetch signed URL from Mistral API', async () => { const mockResponse: { data: MistralSignedUrlResponse } = { data: { url: 'https://document-url.com', expires_at: Date.now() + 86400000, }, }; mockAxios.get!.mockResolvedValueOnce(mockResponse); const result = await getSignedUrl({ fileId: 'file-123', apiKey: 'test-api-key', }); expect(mockAxios.get).toHaveBeenCalledWith( 'https://api.mistral.ai/v1/files/file-123/url?expiry=24', { headers: { Authorization: 'Bearer test-api-key', }, }, ); expect(result).toEqual(mockResponse.data); }); it('should handle errors when fetching signed URL', async () => { const errorMessage = 'API error'; mockAxios.get!.mockRejectedValueOnce(new Error(errorMessage)); await expect( getSignedUrl({ fileId: 'file-123', apiKey: 'test-api-key', }), ).rejects.toThrow(); expect(mockLogger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage); }); }); describe('performOCR', () => { it('should perform OCR using Mistral API (document_url)', async () => { const mockResponse: { data: OCRResult } = { data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Page 1 content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, { index: 1, markdown: 'Page 2 content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 2, doc_size_bytes: 1024, }, }, }; mockAxios.post!.mockResolvedValueOnce(mockResponse); const result = await performOCR({ apiKey: 'test-api-key', url: 'https://document-url.com', model: 'mistral-ocr-latest', documentType: 'document_url', }); expect(mockAxios.post).toHaveBeenCalledWith( 'https://api.mistral.ai/v1/ocr', { model: 'mistral-ocr-latest', include_image_base64: false, image_limit: 0, document: { type: 'document_url', document_url: 'https://document-url.com', }, }, { headers: { 'Content-Type': 'application/json', Authorization: 'Bearer test-api-key', }, }, ); expect(result).toEqual(mockResponse.data); }); it('should perform OCR using Mistral API (image_url)', async () => { const mockResponse: { data: OCRResult } = { data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Image OCR content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 2048, }, }, }; mockAxios.post!.mockResolvedValueOnce(mockResponse); const result = await performOCR({ apiKey: 'test-api-key', url: 'https://image-url.com/image.png', model: 'mistral-ocr-latest', documentType: 'image_url', }); expect(mockAxios.post).toHaveBeenCalledWith( 'https://api.mistral.ai/v1/ocr', { model: 'mistral-ocr-latest', include_image_base64: false, image_limit: 0, document: { type: 'image_url', image_url: 'https://image-url.com/image.png', }, }, { headers: { 'Content-Type': 'application/json', Authorization: 'Bearer test-api-key', }, }, ); expect(result).toEqual(mockResponse.data); }); it('should handle errors during OCR processing', async () => { const errorMessage = 'OCR processing error'; mockAxios.post!.mockRejectedValueOnce(new Error(errorMessage)); await expect( performOCR({ apiKey: 'test-api-key', url: 'https://document-url.com', }), ).rejects.toThrow(); expect(mockLogger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage); }); }); describe('uploadMistralOCR', () => { beforeEach(() => { const mockReadStream: MockReadStream = { on: jest.fn().mockImplementation(function ( this: MockReadStream, event: string, handler: () => void, ) { // Simulate immediate 'end' event to make FormData complete processing if (event === 'end') { handler(); } return this; }), pipe: jest.fn().mockImplementation(function (this: MockReadStream) { return this; }), pause: jest.fn(), resume: jest.fn(), emit: jest.fn(), once: jest.fn(), destroy: jest.fn(), path: '/tmp/upload/file.pdf', fd: 1, flags: 'r', mode: 0o666, autoClose: true, bytesRead: 0, closed: false, pending: false, }; (jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream); }); it('should process OCR for a file with standard configuration', async () => { // Setup mocks mockLoadAuthValues.mockResolvedValue({ OCR_API_KEY: 'test-api-key', OCR_BASEURL: 'https://api.mistral.ai/v1', }); // Mock file upload response mockAxios.post!.mockResolvedValueOnce({ data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'document.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }); // Mock signed URL response mockAxios.get!.mockResolvedValueOnce({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }); // Mock OCR response with text and images mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-medium', pages: [ { index: 0, markdown: 'Page 1 content', images: [ { id: 'img1', top_left_x: 0, top_left_y: 0, bottom_right_x: 100, bottom_right_y: 100, image_base64: 'base64image1', image_annotation: '', }, ], dimensions: { dpi: 300, height: 1100, width: 850 }, }, { index: 1, markdown: 'Page 2 content', images: [ { id: 'img2', top_left_x: 0, top_left_y: 0, bottom_right_x: 100, bottom_right_y: 100, image_base64: 'base64image2', image_annotation: '', }, ], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 2, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user123' }, app: { locals: { ocr: { // Use environment variable syntax to ensure loadAuthValues is called apiKey: '${OCR_API_KEY}', baseURL: '${OCR_BASEURL}', mistralModel: 'mistral-medium', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; const result = await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/file.pdf', ); expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user123', authFields: ['OCR_BASEURL', 'OCR_API_KEY'], optional: expect.any(Set), }); // Verify OCR result expect(result).toEqual({ filename: 'document.pdf', bytes: expect.any(Number), filepath: 'mistral_ocr', text: expect.stringContaining('# PAGE 1'), images: ['base64image1', 'base64image2'], }); }); it('should process OCR for an image file and use image_url type', async () => { mockLoadAuthValues.mockResolvedValue({ OCR_API_KEY: 'test-api-key', OCR_BASEURL: 'https://api.mistral.ai/v1', }); // Mock file upload response mockAxios.post!.mockResolvedValueOnce({ data: { id: 'file-456', object: 'file', bytes: 2048, created_at: Date.now(), filename: 'image.png', purpose: 'ocr', } as MistralFileUploadResponse, }); // Mock signed URL response mockAxios.get!.mockResolvedValueOnce({ data: { url: 'https://signed-url.com/image.png', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }); // Mock OCR response for image mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-medium', pages: [ { index: 0, markdown: 'Image OCR result', images: [ { id: 'img1', top_left_x: 0, top_left_y: 0, bottom_right_x: 100, bottom_right_y: 100, image_base64: 'imgbase64', image_annotation: '', }, ], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 2048, }, }, }); const req = { user: { id: 'user456' }, app: { locals: { ocr: { apiKey: '${OCR_API_KEY}', baseURL: '${OCR_BASEURL}', mistralModel: 'mistral-medium', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/image.png', originalname: 'image.png', mimetype: 'image/png', } as Express.Multer.File; const result = await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/image.png', ); expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user456', authFields: ['OCR_BASEURL', 'OCR_API_KEY'], optional: expect.any(Set), }); // Check that the OCR API was called with image_url type expect(mockAxios.post).toHaveBeenCalledWith( 'https://api.mistral.ai/v1/ocr', expect.objectContaining({ document: expect.objectContaining({ type: 'image_url', image_url: 'https://signed-url.com/image.png', }), }), expect.any(Object), ); expect(result).toEqual({ filename: 'image.png', bytes: expect.any(Number), filepath: 'mistral_ocr', text: expect.stringContaining('Image OCR result'), images: ['imgbase64'], }); }); it('should process variable references in configuration', async () => { // Setup mocks with environment variables mockLoadAuthValues.mockResolvedValue({ CUSTOM_API_KEY: 'custom-api-key', CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1', }); // Mock API responses mockAxios.post!.mockResolvedValueOnce({ data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'document.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }); mockAxios.get!.mockResolvedValueOnce({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }); mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-large', pages: [ { index: 0, markdown: 'Content from custom API', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user123' }, app: { locals: { ocr: { apiKey: '${CUSTOM_API_KEY}', baseURL: '${CUSTOM_BASEURL}', mistralModel: '${CUSTOM_MODEL}', }, }, }, } as unknown as ExpressRequest; // Set environment variable for model process.env.CUSTOM_MODEL = 'mistral-large'; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; const result = await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/file.pdf', ); // Verify that custom environment variables were extracted and used expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user123', authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'], optional: expect.any(Set), }); // Check that mistral-large was used in the OCR API call expect(mockAxios.post).toHaveBeenCalledWith( expect.anything(), expect.objectContaining({ model: 'mistral-large', }), expect.anything(), ); expect(result.text).toEqual('Content from custom API\n\n'); }); it('should fall back to default values when variables are not properly formatted', async () => { mockLoadAuthValues.mockResolvedValue({ OCR_API_KEY: 'default-api-key', OCR_BASEURL: undefined, // Testing optional parameter }); mockAxios.post!.mockResolvedValueOnce({ data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'document.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }); mockAxios.get!.mockResolvedValueOnce({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }); mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Default API result', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user123' }, app: { locals: { ocr: { // Use environment variable syntax to ensure loadAuthValues is called apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name baseURL: '${OCR_BASEURL}', // Using valid env var format mistralModel: 'mistral-ocr-latest', // Plain string value }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/file.pdf', ); // Should use the default values expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user123', authFields: ['OCR_BASEURL', 'INVALID_FORMAT'], optional: expect.any(Set), }); // Should use the default model when not using environment variable format expect(mockAxios.post).toHaveBeenCalledWith( expect.anything(), expect.objectContaining({ model: 'mistral-ocr-latest', }), expect.anything(), ); }); it('should handle API errors during OCR process', async () => { mockLoadAuthValues.mockResolvedValue({ OCR_API_KEY: 'test-api-key', }); // Mock file upload to fail mockAxios.post!.mockRejectedValueOnce(new Error('Upload failed')); const req = { user: { id: 'user123' }, app: { locals: { ocr: { apiKey: 'OCR_API_KEY', baseURL: 'OCR_BASEURL', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; await expect( uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }), ).rejects.toThrow('Error uploading document to Mistral OCR API'); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/file.pdf', ); }); it('should handle single page documents without page numbering', async () => { mockLoadAuthValues.mockResolvedValue({ OCR_API_KEY: 'test-api-key', OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included }); // Clear all previous mocks mockAxios.post!.mockClear(); mockAxios.get!.mockClear(); // 1. First mock: File upload response mockAxios.post!.mockImplementationOnce(() => Promise.resolve({ data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'single-page.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }), ); // 2. Second mock: Signed URL response mockAxios.get!.mockImplementationOnce(() => Promise.resolve({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }), ); // 3. Third mock: OCR response mockAxios.post!.mockImplementationOnce(() => Promise.resolve({ data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Single page content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }), ); const req = { user: { id: 'user123' }, app: { locals: { ocr: { apiKey: 'OCR_API_KEY', baseURL: 'OCR_BASEURL', mistralModel: 'mistral-ocr-latest', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'single-page.pdf', mimetype: 'application/pdf', } as Express.Multer.File; const result = await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/file.pdf', ); // Verify that single page documents don't include page numbering expect(result.text).not.toContain('# PAGE'); expect(result.text).toEqual('Single page content\n\n'); }); it('should use literal values in configuration when provided directly', async () => { // We'll still mock this but it should not be used for literal values mockLoadAuthValues.mockResolvedValue({}); // Clear all previous mocks mockAxios.post!.mockClear(); mockAxios.get!.mockClear(); // 1. First mock: File upload response mockAxios.post!.mockImplementationOnce(() => Promise.resolve({ data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'direct-values.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }), ); // 2. Second mock: Signed URL response mockAxios.get!.mockImplementationOnce(() => Promise.resolve({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }), ); // 3. Third mock: OCR response mockAxios.post!.mockImplementationOnce(() => Promise.resolve({ data: { model: 'mistral-direct-model', pages: [ { index: 0, markdown: 'Processed with literal config values', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }), ); const req = { user: { id: 'user123' }, app: { locals: { ocr: { // Direct values that should be used as-is, without variable substitution apiKey: 'actual-api-key-value', baseURL: 'https://direct-api-url.mistral.ai/v1', mistralModel: 'mistral-direct-model', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'direct-values.pdf', mimetype: 'application/pdf', } as Express.Multer.File; const result = await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/file.pdf', ); // Verify the correct URL was used with the direct baseURL value expect(mockAxios.post).toHaveBeenCalledWith( 'https://direct-api-url.mistral.ai/v1/files', expect.any(Object), expect.objectContaining({ headers: expect.objectContaining({ Authorization: 'Bearer actual-api-key-value', }), }), ); // Check the OCR call was made with the direct model value expect(mockAxios.post).toHaveBeenCalledWith( 'https://direct-api-url.mistral.ai/v1/ocr', expect.objectContaining({ model: 'mistral-direct-model', }), expect.any(Object), ); // Verify the result expect(result.text).toEqual('Processed with literal config values\n\n'); // Verify loadAuthValues was never called since we used direct values expect(mockLoadAuthValues).not.toHaveBeenCalled(); }); it('should handle empty configuration values and use defaults', async () => { // Set up the mock values to be returned by loadAuthValues mockLoadAuthValues.mockResolvedValue({ OCR_API_KEY: 'default-from-env-key', OCR_BASEURL: 'https://default-from-env.mistral.ai/v1', }); // Clear all previous mocks mockAxios.post!.mockClear(); mockAxios.get!.mockClear(); // 1. First mock: File upload response mockAxios.post!.mockImplementationOnce(() => Promise.resolve({ data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'empty-config.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }), ); // 2. Second mock: Signed URL response mockAxios.get!.mockImplementationOnce(() => Promise.resolve({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }), ); // 3. Third mock: OCR response mockAxios.post!.mockImplementationOnce(() => Promise.resolve({ data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Content from default configuration', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }), ); const req = { user: { id: 'user123' }, app: { locals: { ocr: { // Empty string values - should fall back to defaults apiKey: '', baseURL: '', mistralModel: '', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'empty-config.pdf', mimetype: 'application/pdf', } as Express.Multer.File; const result = await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect((fs as jest.Mocked).createReadStream).toHaveBeenCalledWith( '/tmp/upload/file.pdf', ); // Verify loadAuthValues was called with the default variable names expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user123', authFields: ['OCR_BASEURL', 'OCR_API_KEY'], optional: expect.any(Set), }); // Verify the API calls used the default values from loadAuthValues expect(mockAxios.post).toHaveBeenCalledWith( 'https://default-from-env.mistral.ai/v1/files', expect.any(Object), expect.objectContaining({ headers: expect.objectContaining({ Authorization: 'Bearer default-from-env-key', }), }), ); // Verify the OCR model defaulted to mistral-ocr-latest expect(mockAxios.post).toHaveBeenCalledWith( 'https://default-from-env.mistral.ai/v1/ocr', expect.objectContaining({ model: 'mistral-ocr-latest', }), expect.any(Object), ); // Check result expect(result.text).toEqual('Content from default configuration\n\n'); }); describe('Mixed env var and hardcoded configuration', () => { beforeEach(() => { const mockReadStream: MockReadStream = { on: jest.fn().mockImplementation(function ( this: MockReadStream, event: string, handler: () => void, ) { // Simulate immediate 'end' event to make FormData complete processing if (event === 'end') { handler(); } return this; }), pipe: jest.fn().mockImplementation(function (this: MockReadStream) { return this; }), pause: jest.fn(), resume: jest.fn(), emit: jest.fn(), once: jest.fn(), destroy: jest.fn(), path: '/tmp/upload/file.pdf', fd: 1, flags: 'r', mode: 0o666, autoClose: true, bytesRead: 0, closed: false, pending: false, }; (jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream); }); it('should preserve hardcoded baseURL when only apiKey is an env var', async () => { // This test demonstrates the current bug mockLoadAuthValues.mockResolvedValue({ AZURE_MISTRAL_OCR_API_KEY: 'test-api-key-from-env', // Note: OCR_BASEURL is not returned, simulating it not being set }); // Mock file upload response mockAxios.post!.mockResolvedValueOnce({ data: { id: 'file-123', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'document.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }); // Mock signed URL response mockAxios.get!.mockResolvedValueOnce({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }); // Mock OCR response mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-ocr-2503', pages: [ { index: 0, markdown: 'Test content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user123' }, app: { locals: { ocr: { apiKey: '${AZURE_MISTRAL_OCR_API_KEY}', baseURL: 'https://endpoint.models.ai.azure.com/v1', mistralModel: 'mistral-ocr-2503', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); // Check that loadAuthValues was called only with the env var field expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user123', authFields: ['AZURE_MISTRAL_OCR_API_KEY'], optional: expect.any(Set), }); // The fix: baseURL should be the hardcoded value const uploadCall = mockAxios.post!.mock.calls[0]; expect(uploadCall[0]).toBe('https://endpoint.models.ai.azure.com/v1/files'); }); it('should preserve hardcoded apiKey when only baseURL is an env var', async () => { // This test demonstrates the current bug mockLoadAuthValues.mockResolvedValue({ CUSTOM_OCR_BASEURL: 'https://custom-ocr-endpoint.com/v1', // Note: OCR_API_KEY is not returned, simulating it not being set }); // Mock file upload response mockAxios.post!.mockResolvedValueOnce({ data: { id: 'file-456', object: 'file', bytes: 1024, created_at: Date.now(), filename: 'document.pdf', purpose: 'ocr', } as MistralFileUploadResponse, }); // Mock signed URL response mockAxios.get!.mockResolvedValueOnce({ data: { url: 'https://signed-url.com', expires_at: Date.now() + 86400000, } as MistralSignedUrlResponse, }); // Mock OCR response mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Test content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user456' }, app: { locals: { ocr: { apiKey: 'hardcoded-api-key-12345', baseURL: '${CUSTOM_OCR_BASEURL}', mistralModel: 'mistral-ocr-latest', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; await uploadMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); // Check that loadAuthValues was called only with the env var field expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user456', authFields: ['CUSTOM_OCR_BASEURL'], optional: expect.any(Set), }); // The fix: apiKey should be the hardcoded value const uploadCall = mockAxios.post!.mock.calls[0]; const authHeader = uploadCall[2]?.headers?.Authorization; expect(authHeader).toBe('Bearer hardcoded-api-key-12345'); }); }); }); describe('uploadAzureMistralOCR', () => { beforeEach(() => { (jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(Buffer.from('mock-file-content')); }); it('should process OCR using Azure Mistral with base64 encoding', async () => { mockLoadAuthValues.mockResolvedValue({ OCR_API_KEY: 'azure-api-key', OCR_BASEURL: 'https://azure.mistral.ai/v1', }); // Mock OCR response mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Azure OCR content', images: [ { id: 'azure1', top_left_x: 0, top_left_y: 0, bottom_right_x: 100, bottom_right_y: 100, image_base64: 'azure-base64', image_annotation: '', }, ], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user123' }, app: { locals: { ocr: { apiKey: '${OCR_API_KEY}', baseURL: '${OCR_BASEURL}', mistralModel: 'mistral-ocr-latest', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/azure-file.pdf', originalname: 'azure-document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; const result = await uploadAzureMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); expect(jest.mocked(fs).readFileSync).toHaveBeenCalledWith('/tmp/upload/azure-file.pdf'); // Verify OCR was called with base64 data URL expect(mockAxios.post).toHaveBeenCalledWith( 'https://azure.mistral.ai/v1/ocr', expect.objectContaining({ document: expect.objectContaining({ type: 'document_url', document_url: expect.stringMatching(/^data:application\/pdf;base64,/), }), }), expect.any(Object), ); expect(result).toEqual({ filename: 'azure-document.pdf', bytes: expect.any(Number), filepath: 'azure_mistral_ocr', text: 'Azure OCR content\n\n', images: ['azure-base64'], }); }); describe('Mixed env var and hardcoded configuration', () => { it('should preserve hardcoded baseURL when only apiKey is an env var', async () => { // This test demonstrates the current bug mockLoadAuthValues.mockResolvedValue({ AZURE_MISTRAL_OCR_API_KEY: 'test-api-key-from-env', // Note: OCR_BASEURL is not returned, simulating it not being set }); // Mock OCR response mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-ocr-2503', pages: [ { index: 0, markdown: 'Test content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user123' }, app: { locals: { ocr: { apiKey: '${AZURE_MISTRAL_OCR_API_KEY}', baseURL: 'https://endpoint.models.ai.azure.com/v1', mistralModel: 'mistral-ocr-2503', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; await uploadAzureMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); // Check that loadAuthValues was called only with the env var field expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user123', authFields: ['AZURE_MISTRAL_OCR_API_KEY'], optional: expect.any(Set), }); // The fix: baseURL should be the hardcoded value const ocrCall = mockAxios.post!.mock.calls[0]; expect(ocrCall[0]).toBe('https://endpoint.models.ai.azure.com/v1/ocr'); }); it('should preserve hardcoded apiKey when only baseURL is an env var', async () => { // This test demonstrates the current bug mockLoadAuthValues.mockResolvedValue({ CUSTOM_OCR_BASEURL: 'https://custom-ocr-endpoint.com/v1', // Note: OCR_API_KEY is not returned, simulating it not being set }); // Mock OCR response mockAxios.post!.mockResolvedValueOnce({ data: { model: 'mistral-ocr-latest', pages: [ { index: 0, markdown: 'Test content', images: [], dimensions: { dpi: 300, height: 1100, width: 850 }, }, ], document_annotation: '', usage_info: { pages_processed: 1, doc_size_bytes: 1024, }, }, }); const req = { user: { id: 'user456' }, app: { locals: { ocr: { apiKey: 'hardcoded-api-key-12345', baseURL: '${CUSTOM_OCR_BASEURL}', mistralModel: 'mistral-ocr-latest', }, }, }, } as unknown as ExpressRequest; const file = { path: '/tmp/upload/file.pdf', originalname: 'document.pdf', mimetype: 'application/pdf', } as Express.Multer.File; await uploadAzureMistralOCR({ req, file, loadAuthValues: mockLoadAuthValues, }); // Check that loadAuthValues was called only with the env var field expect(mockLoadAuthValues).toHaveBeenCalledWith({ userId: 'user456', authFields: ['CUSTOM_OCR_BASEURL'], optional: expect.any(Set), }); // The fix: apiKey should be the hardcoded value const ocrCall = mockAxios.post!.mock.calls[0]; const authHeader = ocrCall[2]?.headers?.Authorization; expect(authHeader).toBe('Bearer hardcoded-api-key-12345'); }); }); }); });