🧹 feat: Automatic File Cleanup for Mistral OCR Uploads (#8827)

* chore: Handle optional token_endpoint in OAuth metadata discovery

* chore: Simplify permission typing logic in checkAccess function

* feat: Implement `deleteMistralFile` function and integrate file cleanup in `uploadMistralOCR`
This commit is contained in:
Danny Avila 2025-08-03 17:11:14 -04:00 committed by GitHub
parent 7ef2c626e2
commit 33834cd484
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 438 additions and 12 deletions

View file

@ -50,8 +50,9 @@ import type { MistralFileUploadResponse, MistralSignedUrlResponse, OCRResult } f
import { logger as mockLogger } from '@librechat/data-schemas';
import {
uploadDocumentToMistral,
uploadMistralOCR,
uploadAzureMistralOCR,
deleteMistralFile,
uploadMistralOCR,
getSignedUrl,
performOCR,
} from './crud';
@ -216,6 +217,56 @@ describe('MistralOCR Service', () => {
});
});
describe('deleteMistralFile', () => {
it('should delete a file from Mistral API', async () => {
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
await deleteMistralFile({
fileId: 'file-123',
apiKey: 'test-api-key',
baseURL: 'https://api.mistral.ai/v1',
});
expect(mockAxios.delete).toHaveBeenCalledWith('https://api.mistral.ai/v1/files/file-123', {
headers: {
Authorization: 'Bearer test-api-key',
},
});
});
it('should use default baseURL when not provided', async () => {
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
await deleteMistralFile({
fileId: 'file-456',
apiKey: 'test-api-key',
});
expect(mockAxios.delete).toHaveBeenCalledWith('https://api.mistral.ai/v1/files/file-456', {
headers: {
Authorization: 'Bearer test-api-key',
},
});
});
it('should not throw when deletion fails', async () => {
mockAxios.delete!.mockRejectedValueOnce(new Error('Delete failed'));
// Should not throw
await expect(
deleteMistralFile({
fileId: 'file-789',
apiKey: 'test-api-key',
}),
).resolves.not.toThrow();
expect(mockLogger.error).toHaveBeenCalledWith(
'Error deleting Mistral file file-789:',
expect.any(Error),
);
});
});
describe('performOCR', () => {
it('should perform OCR using Mistral API (document_url)', async () => {
const mockResponse: { data: OCRResult } = {
@ -1345,6 +1396,340 @@ describe('MistralOCR Service', () => {
expect(authHeader).toBe('Bearer hardcoded-api-key-12345');
});
});
describe('File cleanup', () => {
beforeEach(() => {
const mockReadStream: MockReadStream = {
on: jest.fn().mockImplementation(function (
this: MockReadStream,
event: string,
handler: () => void,
) {
if (event === 'end') {
handler();
}
return this;
}),
pipe: jest.fn().mockImplementation(function (this: MockReadStream) {
return this;
}),
pause: jest.fn(),
resume: jest.fn(),
emit: jest.fn(),
once: jest.fn(),
destroy: jest.fn(),
path: '/tmp/upload/file.pdf',
fd: 1,
flags: 'r',
mode: 0o666,
autoClose: true,
bytesRead: 0,
closed: false,
pending: false,
};
(jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream);
// Clear all mocks before each test
mockAxios.delete!.mockClear();
});
it('should delete the uploaded file after successful OCR processing', async () => {
mockLoadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload response
mockAxios.post!.mockResolvedValueOnce({
data: {
id: 'file-cleanup-123',
object: 'file',
bytes: 1024,
created_at: Date.now(),
filename: 'document.pdf',
purpose: 'ocr',
} as MistralFileUploadResponse,
});
// Mock signed URL response
mockAxios.get!.mockResolvedValueOnce({
data: {
url: 'https://signed-url.com',
expires_at: Date.now() + 86400000,
} as MistralSignedUrlResponse,
});
// Mock OCR response
mockAxios.post!.mockResolvedValueOnce({
data: {
model: 'mistral-ocr-latest',
pages: [
{
index: 0,
markdown: 'OCR content',
images: [],
dimensions: { dpi: 300, height: 1100, width: 850 },
},
],
document_annotation: '',
usage_info: {
pages_processed: 1,
doc_size_bytes: 1024,
},
},
});
// Mock delete file response
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-ocr-latest',
},
},
},
} as unknown as ExpressRequest;
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
mimetype: 'application/pdf',
} as Express.Multer.File;
await uploadMistralOCR({
req,
file,
loadAuthValues: mockLoadAuthValues,
});
// Verify delete was called with correct parameters
expect(mockAxios.delete).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-cleanup-123',
{
headers: {
Authorization: 'Bearer test-api-key',
},
},
);
expect(mockAxios.delete).toHaveBeenCalledTimes(1);
});
it('should delete the uploaded file even when OCR processing fails', async () => {
mockLoadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload response
mockAxios.post!.mockResolvedValueOnce({
data: {
id: 'file-cleanup-456',
object: 'file',
bytes: 1024,
created_at: Date.now(),
filename: 'document.pdf',
purpose: 'ocr',
} as MistralFileUploadResponse,
});
// Mock signed URL response
mockAxios.get!.mockResolvedValueOnce({
data: {
url: 'https://signed-url.com',
expires_at: Date.now() + 86400000,
} as MistralSignedUrlResponse,
});
// Mock OCR to fail
mockAxios.post!.mockRejectedValueOnce(new Error('OCR processing failed'));
// Mock delete file response
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-ocr-latest',
},
},
},
} as unknown as ExpressRequest;
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
mimetype: 'application/pdf',
} as Express.Multer.File;
await expect(
uploadMistralOCR({
req,
file,
loadAuthValues: mockLoadAuthValues,
}),
).rejects.toThrow('Error uploading document to Mistral OCR API');
// Verify delete was still called despite the error
expect(mockAxios.delete).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-cleanup-456',
{
headers: {
Authorization: 'Bearer test-api-key',
},
},
);
expect(mockAxios.delete).toHaveBeenCalledTimes(1);
});
it('should handle deletion errors gracefully without throwing', async () => {
mockLoadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload response
mockAxios.post!.mockResolvedValueOnce({
data: {
id: 'file-cleanup-789',
object: 'file',
bytes: 1024,
created_at: Date.now(),
filename: 'document.pdf',
purpose: 'ocr',
} as MistralFileUploadResponse,
});
// Mock signed URL response
mockAxios.get!.mockResolvedValueOnce({
data: {
url: 'https://signed-url.com',
expires_at: Date.now() + 86400000,
} as MistralSignedUrlResponse,
});
// Mock OCR response
mockAxios.post!.mockResolvedValueOnce({
data: {
model: 'mistral-ocr-latest',
pages: [
{
index: 0,
markdown: 'OCR content',
images: [],
dimensions: { dpi: 300, height: 1100, width: 850 },
},
],
document_annotation: '',
usage_info: {
pages_processed: 1,
doc_size_bytes: 1024,
},
},
});
// Mock delete to fail
mockAxios.delete!.mockRejectedValueOnce(new Error('Delete failed'));
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-ocr-latest',
},
},
},
} as unknown as ExpressRequest;
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
mimetype: 'application/pdf',
} as Express.Multer.File;
// Should not throw even if delete fails
const result = await uploadMistralOCR({
req,
file,
loadAuthValues: mockLoadAuthValues,
});
expect(result).toEqual({
filename: 'document.pdf',
bytes: expect.any(Number),
filepath: 'mistral_ocr',
text: 'OCR content\n\n',
images: [],
});
// Verify delete was attempted
expect(mockAxios.delete).toHaveBeenCalledWith(
'https://api.mistral.ai/v1/files/file-cleanup-789',
{
headers: {
Authorization: 'Bearer test-api-key',
},
},
);
// Verify error was logged
expect(mockLogger.error).toHaveBeenCalledWith(
'Error deleting Mistral file file-cleanup-789:',
expect.any(Error),
);
});
it('should not attempt cleanup if file upload fails', async () => {
mockLoadAuthValues.mockResolvedValue({
OCR_API_KEY: 'test-api-key',
OCR_BASEURL: 'https://api.mistral.ai/v1',
});
// Mock file upload to fail
mockAxios.post!.mockRejectedValueOnce(new Error('Upload failed'));
const req = {
user: { id: 'user123' },
app: {
locals: {
ocr: {
apiKey: '${OCR_API_KEY}',
baseURL: '${OCR_BASEURL}',
mistralModel: 'mistral-ocr-latest',
},
},
},
} as unknown as ExpressRequest;
const file = {
path: '/tmp/upload/file.pdf',
originalname: 'document.pdf',
mimetype: 'application/pdf',
} as Express.Multer.File;
await expect(
uploadMistralOCR({
req,
file,
loadAuthValues: mockLoadAuthValues,
}),
).rejects.toThrow('Error uploading document to Mistral OCR API');
// Verify delete was NOT called since upload failed
expect(mockAxios.delete).not.toHaveBeenCalled();
});
});
});
describe('uploadAzureMistralOCR', () => {

View file

@ -172,6 +172,35 @@ export async function performOCR({
});
}
/**
* Deletes a file from Mistral API
* @param params Delete parameters
* @param params.fileId The file ID to delete
* @param params.apiKey Mistral API key
* @param params.baseURL Mistral API base URL
* @returns Promise that resolves when the file is deleted
*/
export async function deleteMistralFile({
fileId,
apiKey,
baseURL = DEFAULT_MISTRAL_BASE_URL,
}: {
fileId: string;
apiKey: string;
baseURL?: string;
}): Promise<void> {
try {
const result = await axios.delete(`${baseURL}/files/${fileId}`, {
headers: {
Authorization: `Bearer ${apiKey}`,
},
});
logger.debug(`Mistral file ${fileId} deleted successfully:`, result.data);
} catch (error) {
logger.error(`Error deleting Mistral file ${fileId}:`, error);
}
}
/**
* Determines if a value needs to be loaded from environment
*/
@ -335,8 +364,14 @@ function createOCRError(error: unknown, baseMessage: string): Error {
* along with the `filename` and `bytes` properties.
*/
export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRUploadResult> => {
let mistralFileId: string | undefined;
let apiKey: string | undefined;
let baseURL: string | undefined;
try {
const { apiKey, baseURL } = await loadAuthConfig(context);
const authConfig = await loadAuthConfig(context);
apiKey = authConfig.apiKey;
baseURL = authConfig.baseURL;
const model = getModelConfig(context.req.app.locals?.ocr);
const mistralFile = await uploadDocumentToMistral({
@ -346,6 +381,8 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
baseURL,
});
mistralFileId = mistralFile.id;
const signedUrlResponse = await getSignedUrl({
apiKey,
baseURL,
@ -354,11 +391,11 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
const documentType = getDocumentType(context.file);
const ocrResult = await performOCR({
apiKey,
baseURL,
model,
url: signedUrlResponse.url,
documentType,
baseURL,
apiKey,
model,
});
if (!ocrResult || !ocrResult.pages || ocrResult.pages.length === 0) {
@ -368,6 +405,10 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
}
const { text, images } = processOCRResult(ocrResult);
if (mistralFileId && apiKey && baseURL) {
await deleteMistralFile({ fileId: mistralFileId, apiKey, baseURL });
}
return {
filename: context.file.originalname,
bytes: text.length * 4,
@ -376,6 +417,9 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
images,
};
} catch (error) {
if (mistralFileId && apiKey && baseURL) {
await deleteMistralFile({ fileId: mistralFileId, apiKey, baseURL });
}
throw createOCRError(error, 'Error uploading document to Mistral OCR API:');
}
};

View file

@ -602,7 +602,7 @@ export class MCPOAuthHandler {
/** Auto-discover OAuth configuration for refresh */
const oauthMetadata = await discoverAuthorizationServerMetadata(metadata.serverUrl);
if (!oauthMetadata.token_endpoint) {
if (!oauthMetadata?.token_endpoint) {
throw new Error('No token endpoint found in OAuth metadata');
}

View file

@ -63,13 +63,10 @@ export const checkAccess = async ({
}
const role = await getRoleByName(user.role);
if (role && role.permissions && role.permissions[permissionType]) {
const permissionValue = role?.permissions?.[permissionType as keyof typeof role.permissions];
if (role && role.permissions && permissionValue) {
const hasAnyPermission = permissions.every((permission) => {
if (
role.permissions?.[permissionType as keyof typeof role.permissions]?.[
permission as keyof (typeof role.permissions)[typeof permissionType]
]
) {
if (permissionValue[permission as keyof typeof permissionValue]) {
return true;
}