mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-23 18:56:12 +01:00
* 🧹 chore: Remove Comments and Cleanup base64 handling for Azure Mistral OCR
* chore: Remove unnecessary await from MCP instructions formatting in AgentClient
* ci: Update document_url regex in MistralOCR tests to support PDF format
1570 lines
45 KiB
TypeScript
1570 lines
45 KiB
TypeScript
// Mock setup must be hoisted
|
|
jest.mock('fs');
|
|
jest.mock('form-data', () => {
|
|
return jest.fn().mockImplementation(() => ({
|
|
append: jest.fn(),
|
|
getHeaders: jest
|
|
.fn()
|
|
.mockReturnValue({ 'content-type': 'multipart/form-data; boundary=---boundary' }),
|
|
getBuffer: jest.fn().mockReturnValue(Buffer.from('mock-form-data')),
|
|
getLength: jest.fn().mockReturnValue(100),
|
|
}));
|
|
});
|
|
jest.mock('axios', () => {
|
|
const mockAxiosInstance = {
|
|
get: jest.fn().mockResolvedValue({ data: {} }),
|
|
post: jest.fn().mockResolvedValue({ data: {} }),
|
|
put: jest.fn().mockResolvedValue({ data: {} }),
|
|
delete: jest.fn().mockResolvedValue({ data: {} }),
|
|
interceptors: {
|
|
request: { use: jest.fn(), eject: jest.fn(), clear: jest.fn() },
|
|
response: { use: jest.fn(), eject: jest.fn(), clear: jest.fn() },
|
|
},
|
|
defaults: {
|
|
proxy: null,
|
|
},
|
|
};
|
|
|
|
return {
|
|
...mockAxiosInstance,
|
|
create: jest.fn().mockReturnValue(mockAxiosInstance),
|
|
};
|
|
});
|
|
|
|
jest.mock('@librechat/data-schemas', () => ({
|
|
logger: {
|
|
error: jest.fn(),
|
|
},
|
|
}));
|
|
|
|
jest.mock('~/utils/axios', () => ({
|
|
createAxiosInstance: () => jest.requireMock('axios'),
|
|
logAxiosError: jest.fn(({ message }) => message || 'Error'),
|
|
}));
|
|
|
|
import * as fs from 'fs';
|
|
import axios from 'axios';
|
|
import type { Request as ExpressRequest } from 'express';
|
|
import type { Readable } from 'stream';
|
|
import type { MistralFileUploadResponse, MistralSignedUrlResponse, OCRResult } from '~/types';
|
|
import { logger as mockLogger } from '@librechat/data-schemas';
|
|
import {
|
|
uploadDocumentToMistral,
|
|
uploadMistralOCR,
|
|
uploadAzureMistralOCR,
|
|
getSignedUrl,
|
|
performOCR,
|
|
} from './crud';
|
|
|
|
interface MockReadStream extends Partial<Readable> {
|
|
on: jest.Mock;
|
|
pipe: jest.Mock;
|
|
pause: jest.Mock;
|
|
resume: jest.Mock;
|
|
emit: jest.Mock;
|
|
once: jest.Mock;
|
|
destroy: jest.Mock;
|
|
path?: string;
|
|
fd?: number;
|
|
flags?: string;
|
|
mode?: number;
|
|
autoClose?: boolean;
|
|
bytesRead?: number;
|
|
closed?: boolean;
|
|
pending?: boolean;
|
|
}
|
|
|
|
const mockAxios = jest.mocked(axios);
|
|
|
|
const mockLoadAuthValues = jest.fn();
|
|
|
|
describe('MistralOCR Service', () => {
|
|
afterEach(() => {
|
|
jest.clearAllMocks();
|
|
});
|
|
|
|
describe('uploadDocumentToMistral', () => {
|
|
beforeEach(() => {
|
|
// Create a more complete mock for file streams that FormData can work with
|
|
const mockReadStream: MockReadStream = {
|
|
on: jest.fn().mockImplementation(function (
|
|
this: MockReadStream,
|
|
event: string,
|
|
handler: () => void,
|
|
) {
|
|
// Simulate immediate 'end' event to make FormData complete processing
|
|
if (event === 'end') {
|
|
handler();
|
|
}
|
|
return this;
|
|
}),
|
|
pipe: jest.fn().mockImplementation(function (this: MockReadStream) {
|
|
return this;
|
|
}),
|
|
pause: jest.fn(),
|
|
resume: jest.fn(),
|
|
emit: jest.fn(),
|
|
once: jest.fn(),
|
|
destroy: jest.fn(),
|
|
path: '/path/to/test.pdf',
|
|
fd: 1,
|
|
flags: 'r',
|
|
mode: 0o666,
|
|
autoClose: true,
|
|
bytesRead: 0,
|
|
closed: false,
|
|
pending: false,
|
|
};
|
|
|
|
(jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream);
|
|
});
|
|
|
|
it('should upload a document to Mistral API using file streaming', async () => {
|
|
const mockResponse: { data: MistralFileUploadResponse } = {
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'test.pdf',
|
|
purpose: 'ocr',
|
|
},
|
|
};
|
|
mockAxios.post!.mockResolvedValueOnce(mockResponse);
|
|
|
|
try {
|
|
const result = await uploadDocumentToMistral({
|
|
filePath: '/path/to/test.pdf',
|
|
fileName: 'test.pdf',
|
|
apiKey: 'test-api-key',
|
|
});
|
|
|
|
// Check that createReadStream was called with the correct file path
|
|
expect(jest.mocked(fs).createReadStream).toHaveBeenCalledWith('/path/to/test.pdf');
|
|
|
|
// Since we're mocking FormData, we'll just check that axios was called correctly
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://api.mistral.ai/v1/files',
|
|
expect.anything(),
|
|
expect.objectContaining({
|
|
headers: expect.objectContaining({
|
|
Authorization: 'Bearer test-api-key',
|
|
}),
|
|
maxBodyLength: Infinity,
|
|
maxContentLength: Infinity,
|
|
}),
|
|
);
|
|
expect(result).toEqual(mockResponse.data);
|
|
} catch (error) {
|
|
console.error('Test error:', error);
|
|
throw error;
|
|
}
|
|
});
|
|
|
|
it('should handle errors during document upload', async () => {
|
|
const errorMessage = 'API error';
|
|
mockAxios.post!.mockRejectedValueOnce(new Error(errorMessage));
|
|
|
|
await expect(
|
|
uploadDocumentToMistral({
|
|
filePath: '/path/to/test.pdf',
|
|
fileName: 'test.pdf',
|
|
apiKey: 'test-api-key',
|
|
}),
|
|
).rejects.toThrow(errorMessage);
|
|
});
|
|
});
|
|
|
|
describe('getSignedUrl', () => {
|
|
it('should fetch signed URL from Mistral API', async () => {
|
|
const mockResponse: { data: MistralSignedUrlResponse } = {
|
|
data: {
|
|
url: 'https://document-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
},
|
|
};
|
|
mockAxios.get!.mockResolvedValueOnce(mockResponse);
|
|
|
|
const result = await getSignedUrl({
|
|
fileId: 'file-123',
|
|
apiKey: 'test-api-key',
|
|
});
|
|
|
|
expect(mockAxios.get).toHaveBeenCalledWith(
|
|
'https://api.mistral.ai/v1/files/file-123/url?expiry=24',
|
|
{
|
|
headers: {
|
|
Authorization: 'Bearer test-api-key',
|
|
},
|
|
},
|
|
);
|
|
expect(result).toEqual(mockResponse.data);
|
|
});
|
|
|
|
it('should handle errors when fetching signed URL', async () => {
|
|
const errorMessage = 'API error';
|
|
mockAxios.get!.mockRejectedValueOnce(new Error(errorMessage));
|
|
|
|
await expect(
|
|
getSignedUrl({
|
|
fileId: 'file-123',
|
|
apiKey: 'test-api-key',
|
|
}),
|
|
).rejects.toThrow();
|
|
|
|
expect(mockLogger.error).toHaveBeenCalledWith('Error fetching signed URL:', errorMessage);
|
|
});
|
|
});
|
|
|
|
describe('performOCR', () => {
|
|
it('should perform OCR using Mistral API (document_url)', async () => {
|
|
const mockResponse: { data: OCRResult } = {
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Page 1 content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
{
|
|
index: 1,
|
|
markdown: 'Page 2 content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 2,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
};
|
|
mockAxios.post!.mockResolvedValueOnce(mockResponse);
|
|
|
|
const result = await performOCR({
|
|
apiKey: 'test-api-key',
|
|
url: 'https://document-url.com',
|
|
model: 'mistral-ocr-latest',
|
|
documentType: 'document_url',
|
|
});
|
|
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://api.mistral.ai/v1/ocr',
|
|
{
|
|
model: 'mistral-ocr-latest',
|
|
include_image_base64: false,
|
|
image_limit: 0,
|
|
document: {
|
|
type: 'document_url',
|
|
document_url: 'https://document-url.com',
|
|
},
|
|
},
|
|
{
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
Authorization: 'Bearer test-api-key',
|
|
},
|
|
},
|
|
);
|
|
expect(result).toEqual(mockResponse.data);
|
|
});
|
|
|
|
it('should perform OCR using Mistral API (image_url)', async () => {
|
|
const mockResponse: { data: OCRResult } = {
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Image OCR content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 2048,
|
|
},
|
|
},
|
|
};
|
|
mockAxios.post!.mockResolvedValueOnce(mockResponse);
|
|
|
|
const result = await performOCR({
|
|
apiKey: 'test-api-key',
|
|
url: 'https://image-url.com/image.png',
|
|
model: 'mistral-ocr-latest',
|
|
documentType: 'image_url',
|
|
});
|
|
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://api.mistral.ai/v1/ocr',
|
|
{
|
|
model: 'mistral-ocr-latest',
|
|
include_image_base64: false,
|
|
image_limit: 0,
|
|
document: {
|
|
type: 'image_url',
|
|
image_url: 'https://image-url.com/image.png',
|
|
},
|
|
},
|
|
{
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
Authorization: 'Bearer test-api-key',
|
|
},
|
|
},
|
|
);
|
|
expect(result).toEqual(mockResponse.data);
|
|
});
|
|
|
|
it('should handle errors during OCR processing', async () => {
|
|
const errorMessage = 'OCR processing error';
|
|
mockAxios.post!.mockRejectedValueOnce(new Error(errorMessage));
|
|
|
|
await expect(
|
|
performOCR({
|
|
apiKey: 'test-api-key',
|
|
url: 'https://document-url.com',
|
|
}),
|
|
).rejects.toThrow();
|
|
|
|
expect(mockLogger.error).toHaveBeenCalledWith('Error performing OCR:', errorMessage);
|
|
});
|
|
});
|
|
|
|
describe('uploadMistralOCR', () => {
|
|
beforeEach(() => {
|
|
const mockReadStream: MockReadStream = {
|
|
on: jest.fn().mockImplementation(function (
|
|
this: MockReadStream,
|
|
event: string,
|
|
handler: () => void,
|
|
) {
|
|
// Simulate immediate 'end' event to make FormData complete processing
|
|
if (event === 'end') {
|
|
handler();
|
|
}
|
|
return this;
|
|
}),
|
|
pipe: jest.fn().mockImplementation(function (this: MockReadStream) {
|
|
return this;
|
|
}),
|
|
pause: jest.fn(),
|
|
resume: jest.fn(),
|
|
emit: jest.fn(),
|
|
once: jest.fn(),
|
|
destroy: jest.fn(),
|
|
path: '/tmp/upload/file.pdf',
|
|
fd: 1,
|
|
flags: 'r',
|
|
mode: 0o666,
|
|
autoClose: true,
|
|
bytesRead: 0,
|
|
closed: false,
|
|
pending: false,
|
|
};
|
|
|
|
(jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream);
|
|
});
|
|
|
|
it('should process OCR for a file with standard configuration', async () => {
|
|
// Setup mocks
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
OCR_API_KEY: 'test-api-key',
|
|
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
|
});
|
|
|
|
// Mock file upload response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'document.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
});
|
|
|
|
// Mock signed URL response
|
|
mockAxios.get!.mockResolvedValueOnce({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
});
|
|
|
|
// Mock OCR response with text and images
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-medium',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Page 1 content',
|
|
images: [
|
|
{
|
|
id: 'img1',
|
|
top_left_x: 0,
|
|
top_left_y: 0,
|
|
bottom_right_x: 100,
|
|
bottom_right_y: 100,
|
|
image_base64: 'base64image1',
|
|
image_annotation: '',
|
|
},
|
|
],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
{
|
|
index: 1,
|
|
markdown: 'Page 2 content',
|
|
images: [
|
|
{
|
|
id: 'img2',
|
|
top_left_x: 0,
|
|
top_left_y: 0,
|
|
bottom_right_x: 100,
|
|
bottom_right_y: 100,
|
|
image_base64: 'base64image2',
|
|
image_annotation: '',
|
|
},
|
|
],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 2,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
// Use environment variable syntax to ensure loadAuthValues is called
|
|
apiKey: '${OCR_API_KEY}',
|
|
baseURL: '${OCR_BASEURL}',
|
|
mistralModel: 'mistral-medium',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
const result = await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/file.pdf',
|
|
);
|
|
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user123',
|
|
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// Verify OCR result
|
|
expect(result).toEqual({
|
|
filename: 'document.pdf',
|
|
bytes: expect.any(Number),
|
|
filepath: 'mistral_ocr',
|
|
text: expect.stringContaining('# PAGE 1'),
|
|
images: ['base64image1', 'base64image2'],
|
|
});
|
|
});
|
|
|
|
it('should process OCR for an image file and use image_url type', async () => {
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
OCR_API_KEY: 'test-api-key',
|
|
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
|
});
|
|
|
|
// Mock file upload response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
id: 'file-456',
|
|
object: 'file',
|
|
bytes: 2048,
|
|
created_at: Date.now(),
|
|
filename: 'image.png',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
});
|
|
|
|
// Mock signed URL response
|
|
mockAxios.get!.mockResolvedValueOnce({
|
|
data: {
|
|
url: 'https://signed-url.com/image.png',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
});
|
|
|
|
// Mock OCR response for image
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-medium',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Image OCR result',
|
|
images: [
|
|
{
|
|
id: 'img1',
|
|
top_left_x: 0,
|
|
top_left_y: 0,
|
|
bottom_right_x: 100,
|
|
bottom_right_y: 100,
|
|
image_base64: 'imgbase64',
|
|
image_annotation: '',
|
|
},
|
|
],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 2048,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user456' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: '${OCR_API_KEY}',
|
|
baseURL: '${OCR_BASEURL}',
|
|
mistralModel: 'mistral-medium',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/image.png',
|
|
originalname: 'image.png',
|
|
mimetype: 'image/png',
|
|
} as Express.Multer.File;
|
|
|
|
const result = await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/image.png',
|
|
);
|
|
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user456',
|
|
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// Check that the OCR API was called with image_url type
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://api.mistral.ai/v1/ocr',
|
|
expect.objectContaining({
|
|
document: expect.objectContaining({
|
|
type: 'image_url',
|
|
image_url: 'https://signed-url.com/image.png',
|
|
}),
|
|
}),
|
|
expect.any(Object),
|
|
);
|
|
|
|
expect(result).toEqual({
|
|
filename: 'image.png',
|
|
bytes: expect.any(Number),
|
|
filepath: 'mistral_ocr',
|
|
text: expect.stringContaining('Image OCR result'),
|
|
images: ['imgbase64'],
|
|
});
|
|
});
|
|
|
|
it('should process variable references in configuration', async () => {
|
|
// Setup mocks with environment variables
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
CUSTOM_API_KEY: 'custom-api-key',
|
|
CUSTOM_BASEURL: 'https://custom-api.mistral.ai/v1',
|
|
});
|
|
|
|
// Mock API responses
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'document.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
});
|
|
mockAxios.get!.mockResolvedValueOnce({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
});
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-large',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Content from custom API',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: '${CUSTOM_API_KEY}',
|
|
baseURL: '${CUSTOM_BASEURL}',
|
|
mistralModel: '${CUSTOM_MODEL}',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
// Set environment variable for model
|
|
process.env.CUSTOM_MODEL = 'mistral-large';
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
const result = await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/file.pdf',
|
|
);
|
|
|
|
// Verify that custom environment variables were extracted and used
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user123',
|
|
authFields: ['CUSTOM_BASEURL', 'CUSTOM_API_KEY'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// Check that mistral-large was used in the OCR API call
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
expect.anything(),
|
|
expect.objectContaining({
|
|
model: 'mistral-large',
|
|
}),
|
|
expect.anything(),
|
|
);
|
|
|
|
expect(result.text).toEqual('Content from custom API\n\n');
|
|
});
|
|
|
|
it('should fall back to default values when variables are not properly formatted', async () => {
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
OCR_API_KEY: 'default-api-key',
|
|
OCR_BASEURL: undefined, // Testing optional parameter
|
|
});
|
|
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'document.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
});
|
|
mockAxios.get!.mockResolvedValueOnce({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
});
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Default API result',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
// Use environment variable syntax to ensure loadAuthValues is called
|
|
apiKey: '${INVALID_FORMAT}', // Using valid env var format but with an invalid name
|
|
baseURL: '${OCR_BASEURL}', // Using valid env var format
|
|
mistralModel: 'mistral-ocr-latest', // Plain string value
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/file.pdf',
|
|
);
|
|
|
|
// Should use the default values
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user123',
|
|
authFields: ['OCR_BASEURL', 'INVALID_FORMAT'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// Should use the default model when not using environment variable format
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
expect.anything(),
|
|
expect.objectContaining({
|
|
model: 'mistral-ocr-latest',
|
|
}),
|
|
expect.anything(),
|
|
);
|
|
});
|
|
|
|
it('should handle API errors during OCR process', async () => {
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
OCR_API_KEY: 'test-api-key',
|
|
});
|
|
|
|
// Mock file upload to fail
|
|
mockAxios.post!.mockRejectedValueOnce(new Error('Upload failed'));
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: 'OCR_API_KEY',
|
|
baseURL: 'OCR_BASEURL',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
await expect(
|
|
uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
}),
|
|
).rejects.toThrow('Error uploading document to Mistral OCR API');
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/file.pdf',
|
|
);
|
|
});
|
|
|
|
it('should handle single page documents without page numbering', async () => {
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
OCR_API_KEY: 'test-api-key',
|
|
OCR_BASEURL: 'https://api.mistral.ai/v1', // Make sure this is included
|
|
});
|
|
|
|
// Clear all previous mocks
|
|
mockAxios.post!.mockClear();
|
|
mockAxios.get!.mockClear();
|
|
|
|
// 1. First mock: File upload response
|
|
mockAxios.post!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'single-page.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
}),
|
|
);
|
|
|
|
// 2. Second mock: Signed URL response
|
|
mockAxios.get!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
}),
|
|
);
|
|
|
|
// 3. Third mock: OCR response
|
|
mockAxios.post!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Single page content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
}),
|
|
);
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: 'OCR_API_KEY',
|
|
baseURL: 'OCR_BASEURL',
|
|
mistralModel: 'mistral-ocr-latest',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'single-page.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
const result = await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/file.pdf',
|
|
);
|
|
|
|
// Verify that single page documents don't include page numbering
|
|
expect(result.text).not.toContain('# PAGE');
|
|
expect(result.text).toEqual('Single page content\n\n');
|
|
});
|
|
|
|
it('should use literal values in configuration when provided directly', async () => {
|
|
// We'll still mock this but it should not be used for literal values
|
|
mockLoadAuthValues.mockResolvedValue({});
|
|
|
|
// Clear all previous mocks
|
|
mockAxios.post!.mockClear();
|
|
mockAxios.get!.mockClear();
|
|
|
|
// 1. First mock: File upload response
|
|
mockAxios.post!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'direct-values.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
}),
|
|
);
|
|
|
|
// 2. Second mock: Signed URL response
|
|
mockAxios.get!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
}),
|
|
);
|
|
|
|
// 3. Third mock: OCR response
|
|
mockAxios.post!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
model: 'mistral-direct-model',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Processed with literal config values',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
}),
|
|
);
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
// Direct values that should be used as-is, without variable substitution
|
|
apiKey: 'actual-api-key-value',
|
|
baseURL: 'https://direct-api-url.mistral.ai/v1',
|
|
mistralModel: 'mistral-direct-model',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'direct-values.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
const result = await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/file.pdf',
|
|
);
|
|
|
|
// Verify the correct URL was used with the direct baseURL value
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://direct-api-url.mistral.ai/v1/files',
|
|
expect.any(Object),
|
|
expect.objectContaining({
|
|
headers: expect.objectContaining({
|
|
Authorization: 'Bearer actual-api-key-value',
|
|
}),
|
|
}),
|
|
);
|
|
|
|
// Check the OCR call was made with the direct model value
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://direct-api-url.mistral.ai/v1/ocr',
|
|
expect.objectContaining({
|
|
model: 'mistral-direct-model',
|
|
}),
|
|
expect.any(Object),
|
|
);
|
|
|
|
// Verify the result
|
|
expect(result.text).toEqual('Processed with literal config values\n\n');
|
|
|
|
// Verify loadAuthValues was never called since we used direct values
|
|
expect(mockLoadAuthValues).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('should handle empty configuration values and use defaults', async () => {
|
|
// Set up the mock values to be returned by loadAuthValues
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
OCR_API_KEY: 'default-from-env-key',
|
|
OCR_BASEURL: 'https://default-from-env.mistral.ai/v1',
|
|
});
|
|
|
|
// Clear all previous mocks
|
|
mockAxios.post!.mockClear();
|
|
mockAxios.get!.mockClear();
|
|
|
|
// 1. First mock: File upload response
|
|
mockAxios.post!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'empty-config.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
}),
|
|
);
|
|
|
|
// 2. Second mock: Signed URL response
|
|
mockAxios.get!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
}),
|
|
);
|
|
|
|
// 3. Third mock: OCR response
|
|
mockAxios.post!.mockImplementationOnce(() =>
|
|
Promise.resolve({
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Content from default configuration',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
}),
|
|
);
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
// Empty string values - should fall back to defaults
|
|
apiKey: '',
|
|
baseURL: '',
|
|
mistralModel: '',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'empty-config.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
const result = await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect((fs as jest.Mocked<typeof fs>).createReadStream).toHaveBeenCalledWith(
|
|
'/tmp/upload/file.pdf',
|
|
);
|
|
|
|
// Verify loadAuthValues was called with the default variable names
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user123',
|
|
authFields: ['OCR_BASEURL', 'OCR_API_KEY'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// Verify the API calls used the default values from loadAuthValues
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://default-from-env.mistral.ai/v1/files',
|
|
expect.any(Object),
|
|
expect.objectContaining({
|
|
headers: expect.objectContaining({
|
|
Authorization: 'Bearer default-from-env-key',
|
|
}),
|
|
}),
|
|
);
|
|
|
|
// Verify the OCR model defaulted to mistral-ocr-latest
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://default-from-env.mistral.ai/v1/ocr',
|
|
expect.objectContaining({
|
|
model: 'mistral-ocr-latest',
|
|
}),
|
|
expect.any(Object),
|
|
);
|
|
|
|
// Check result
|
|
expect(result.text).toEqual('Content from default configuration\n\n');
|
|
});
|
|
|
|
describe('Mixed env var and hardcoded configuration', () => {
|
|
beforeEach(() => {
|
|
const mockReadStream: MockReadStream = {
|
|
on: jest.fn().mockImplementation(function (
|
|
this: MockReadStream,
|
|
event: string,
|
|
handler: () => void,
|
|
) {
|
|
// Simulate immediate 'end' event to make FormData complete processing
|
|
if (event === 'end') {
|
|
handler();
|
|
}
|
|
return this;
|
|
}),
|
|
pipe: jest.fn().mockImplementation(function (this: MockReadStream) {
|
|
return this;
|
|
}),
|
|
pause: jest.fn(),
|
|
resume: jest.fn(),
|
|
emit: jest.fn(),
|
|
once: jest.fn(),
|
|
destroy: jest.fn(),
|
|
path: '/tmp/upload/file.pdf',
|
|
fd: 1,
|
|
flags: 'r',
|
|
mode: 0o666,
|
|
autoClose: true,
|
|
bytesRead: 0,
|
|
closed: false,
|
|
pending: false,
|
|
};
|
|
|
|
(jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream);
|
|
});
|
|
|
|
it('should preserve hardcoded baseURL when only apiKey is an env var', async () => {
|
|
// This test demonstrates the current bug
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
AZURE_MISTRAL_OCR_API_KEY: 'test-api-key-from-env',
|
|
// Note: OCR_BASEURL is not returned, simulating it not being set
|
|
});
|
|
|
|
// Mock file upload response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
id: 'file-123',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'document.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
});
|
|
|
|
// Mock signed URL response
|
|
mockAxios.get!.mockResolvedValueOnce({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
});
|
|
|
|
// Mock OCR response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-ocr-2503',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Test content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: '${AZURE_MISTRAL_OCR_API_KEY}',
|
|
baseURL: 'https://endpoint.models.ai.azure.com/v1',
|
|
mistralModel: 'mistral-ocr-2503',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
// Check that loadAuthValues was called only with the env var field
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user123',
|
|
authFields: ['AZURE_MISTRAL_OCR_API_KEY'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// The fix: baseURL should be the hardcoded value
|
|
const uploadCall = mockAxios.post!.mock.calls[0];
|
|
expect(uploadCall[0]).toBe('https://endpoint.models.ai.azure.com/v1/files');
|
|
});
|
|
|
|
it('should preserve hardcoded apiKey when only baseURL is an env var', async () => {
|
|
// This test demonstrates the current bug
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
CUSTOM_OCR_BASEURL: 'https://custom-ocr-endpoint.com/v1',
|
|
// Note: OCR_API_KEY is not returned, simulating it not being set
|
|
});
|
|
|
|
// Mock file upload response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
id: 'file-456',
|
|
object: 'file',
|
|
bytes: 1024,
|
|
created_at: Date.now(),
|
|
filename: 'document.pdf',
|
|
purpose: 'ocr',
|
|
} as MistralFileUploadResponse,
|
|
});
|
|
|
|
// Mock signed URL response
|
|
mockAxios.get!.mockResolvedValueOnce({
|
|
data: {
|
|
url: 'https://signed-url.com',
|
|
expires_at: Date.now() + 86400000,
|
|
} as MistralSignedUrlResponse,
|
|
});
|
|
|
|
// Mock OCR response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Test content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user456' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: 'hardcoded-api-key-12345',
|
|
baseURL: '${CUSTOM_OCR_BASEURL}',
|
|
mistralModel: 'mistral-ocr-latest',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
await uploadMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
// Check that loadAuthValues was called only with the env var field
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user456',
|
|
authFields: ['CUSTOM_OCR_BASEURL'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// The fix: apiKey should be the hardcoded value
|
|
const uploadCall = mockAxios.post!.mock.calls[0];
|
|
const authHeader = uploadCall[2]?.headers?.Authorization;
|
|
expect(authHeader).toBe('Bearer hardcoded-api-key-12345');
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('uploadAzureMistralOCR', () => {
|
|
beforeEach(() => {
|
|
(jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(Buffer.from('mock-file-content'));
|
|
});
|
|
|
|
it('should process OCR using Azure Mistral with base64 encoding', async () => {
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
OCR_API_KEY: 'azure-api-key',
|
|
OCR_BASEURL: 'https://azure.mistral.ai/v1',
|
|
});
|
|
|
|
// Mock OCR response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Azure OCR content',
|
|
images: [
|
|
{
|
|
id: 'azure1',
|
|
top_left_x: 0,
|
|
top_left_y: 0,
|
|
bottom_right_x: 100,
|
|
bottom_right_y: 100,
|
|
image_base64: 'azure-base64',
|
|
image_annotation: '',
|
|
},
|
|
],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: '${OCR_API_KEY}',
|
|
baseURL: '${OCR_BASEURL}',
|
|
mistralModel: 'mistral-ocr-latest',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/azure-file.pdf',
|
|
originalname: 'azure-document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
const result = await uploadAzureMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
expect(jest.mocked(fs).readFileSync).toHaveBeenCalledWith('/tmp/upload/azure-file.pdf');
|
|
|
|
// Verify OCR was called with base64 data URL
|
|
expect(mockAxios.post).toHaveBeenCalledWith(
|
|
'https://azure.mistral.ai/v1/ocr',
|
|
expect.objectContaining({
|
|
document: expect.objectContaining({
|
|
type: 'document_url',
|
|
document_url: expect.stringMatching(/^data:application\/pdf;base64,/),
|
|
}),
|
|
}),
|
|
expect.any(Object),
|
|
);
|
|
|
|
expect(result).toEqual({
|
|
filename: 'azure-document.pdf',
|
|
bytes: expect.any(Number),
|
|
filepath: 'azure_mistral_ocr',
|
|
text: 'Azure OCR content\n\n',
|
|
images: ['azure-base64'],
|
|
});
|
|
});
|
|
|
|
describe('Mixed env var and hardcoded configuration', () => {
|
|
it('should preserve hardcoded baseURL when only apiKey is an env var', async () => {
|
|
// This test demonstrates the current bug
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
AZURE_MISTRAL_OCR_API_KEY: 'test-api-key-from-env',
|
|
// Note: OCR_BASEURL is not returned, simulating it not being set
|
|
});
|
|
|
|
// Mock OCR response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-ocr-2503',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Test content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user123' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: '${AZURE_MISTRAL_OCR_API_KEY}',
|
|
baseURL: 'https://endpoint.models.ai.azure.com/v1',
|
|
mistralModel: 'mistral-ocr-2503',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
await uploadAzureMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
// Check that loadAuthValues was called only with the env var field
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user123',
|
|
authFields: ['AZURE_MISTRAL_OCR_API_KEY'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// The fix: baseURL should be the hardcoded value
|
|
const ocrCall = mockAxios.post!.mock.calls[0];
|
|
expect(ocrCall[0]).toBe('https://endpoint.models.ai.azure.com/v1/ocr');
|
|
});
|
|
|
|
it('should preserve hardcoded apiKey when only baseURL is an env var', async () => {
|
|
// This test demonstrates the current bug
|
|
mockLoadAuthValues.mockResolvedValue({
|
|
CUSTOM_OCR_BASEURL: 'https://custom-ocr-endpoint.com/v1',
|
|
// Note: OCR_API_KEY is not returned, simulating it not being set
|
|
});
|
|
|
|
// Mock OCR response
|
|
mockAxios.post!.mockResolvedValueOnce({
|
|
data: {
|
|
model: 'mistral-ocr-latest',
|
|
pages: [
|
|
{
|
|
index: 0,
|
|
markdown: 'Test content',
|
|
images: [],
|
|
dimensions: { dpi: 300, height: 1100, width: 850 },
|
|
},
|
|
],
|
|
document_annotation: '',
|
|
usage_info: {
|
|
pages_processed: 1,
|
|
doc_size_bytes: 1024,
|
|
},
|
|
},
|
|
});
|
|
|
|
const req = {
|
|
user: { id: 'user456' },
|
|
app: {
|
|
locals: {
|
|
ocr: {
|
|
apiKey: 'hardcoded-api-key-12345',
|
|
baseURL: '${CUSTOM_OCR_BASEURL}',
|
|
mistralModel: 'mistral-ocr-latest',
|
|
},
|
|
},
|
|
},
|
|
} as unknown as ExpressRequest;
|
|
|
|
const file = {
|
|
path: '/tmp/upload/file.pdf',
|
|
originalname: 'document.pdf',
|
|
mimetype: 'application/pdf',
|
|
} as Express.Multer.File;
|
|
|
|
await uploadAzureMistralOCR({
|
|
req,
|
|
file,
|
|
loadAuthValues: mockLoadAuthValues,
|
|
});
|
|
|
|
// Check that loadAuthValues was called only with the env var field
|
|
expect(mockLoadAuthValues).toHaveBeenCalledWith({
|
|
userId: 'user456',
|
|
authFields: ['CUSTOM_OCR_BASEURL'],
|
|
optional: expect.any(Set),
|
|
});
|
|
|
|
// The fix: apiKey should be the hardcoded value
|
|
const ocrCall = mockAxios.post!.mock.calls[0];
|
|
const authHeader = ocrCall[2]?.headers?.Authorization;
|
|
expect(authHeader).toBe('Bearer hardcoded-api-key-12345');
|
|
});
|
|
});
|
|
});
|
|
});
|