mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
🧹 feat: Automatic File Cleanup for Mistral OCR Uploads (#8827)
* chore: Handle optional token_endpoint in OAuth metadata discovery * chore: Simplify permission typing logic in checkAccess function * feat: Implement `deleteMistralFile` function and integrate file cleanup in `uploadMistralOCR`
This commit is contained in:
parent
7ef2c626e2
commit
33834cd484
4 changed files with 438 additions and 12 deletions
|
|
@ -50,8 +50,9 @@ import type { MistralFileUploadResponse, MistralSignedUrlResponse, OCRResult } f
|
|||
import { logger as mockLogger } from '@librechat/data-schemas';
|
||||
import {
|
||||
uploadDocumentToMistral,
|
||||
uploadMistralOCR,
|
||||
uploadAzureMistralOCR,
|
||||
deleteMistralFile,
|
||||
uploadMistralOCR,
|
||||
getSignedUrl,
|
||||
performOCR,
|
||||
} from './crud';
|
||||
|
|
@ -216,6 +217,56 @@ describe('MistralOCR Service', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('deleteMistralFile', () => {
|
||||
it('should delete a file from Mistral API', async () => {
|
||||
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
|
||||
|
||||
await deleteMistralFile({
|
||||
fileId: 'file-123',
|
||||
apiKey: 'test-api-key',
|
||||
baseURL: 'https://api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
expect(mockAxios.delete).toHaveBeenCalledWith('https://api.mistral.ai/v1/files/file-123', {
|
||||
headers: {
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should use default baseURL when not provided', async () => {
|
||||
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
|
||||
|
||||
await deleteMistralFile({
|
||||
fileId: 'file-456',
|
||||
apiKey: 'test-api-key',
|
||||
});
|
||||
|
||||
expect(mockAxios.delete).toHaveBeenCalledWith('https://api.mistral.ai/v1/files/file-456', {
|
||||
headers: {
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should not throw when deletion fails', async () => {
|
||||
mockAxios.delete!.mockRejectedValueOnce(new Error('Delete failed'));
|
||||
|
||||
// Should not throw
|
||||
await expect(
|
||||
deleteMistralFile({
|
||||
fileId: 'file-789',
|
||||
apiKey: 'test-api-key',
|
||||
}),
|
||||
).resolves.not.toThrow();
|
||||
|
||||
expect(mockLogger.error).toHaveBeenCalledWith(
|
||||
'Error deleting Mistral file file-789:',
|
||||
expect.any(Error),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('performOCR', () => {
|
||||
it('should perform OCR using Mistral API (document_url)', async () => {
|
||||
const mockResponse: { data: OCRResult } = {
|
||||
|
|
@ -1345,6 +1396,340 @@ describe('MistralOCR Service', () => {
|
|||
expect(authHeader).toBe('Bearer hardcoded-api-key-12345');
|
||||
});
|
||||
});
|
||||
|
||||
describe('File cleanup', () => {
|
||||
beforeEach(() => {
|
||||
const mockReadStream: MockReadStream = {
|
||||
on: jest.fn().mockImplementation(function (
|
||||
this: MockReadStream,
|
||||
event: string,
|
||||
handler: () => void,
|
||||
) {
|
||||
if (event === 'end') {
|
||||
handler();
|
||||
}
|
||||
return this;
|
||||
}),
|
||||
pipe: jest.fn().mockImplementation(function (this: MockReadStream) {
|
||||
return this;
|
||||
}),
|
||||
pause: jest.fn(),
|
||||
resume: jest.fn(),
|
||||
emit: jest.fn(),
|
||||
once: jest.fn(),
|
||||
destroy: jest.fn(),
|
||||
path: '/tmp/upload/file.pdf',
|
||||
fd: 1,
|
||||
flags: 'r',
|
||||
mode: 0o666,
|
||||
autoClose: true,
|
||||
bytesRead: 0,
|
||||
closed: false,
|
||||
pending: false,
|
||||
};
|
||||
|
||||
(jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream);
|
||||
// Clear all mocks before each test
|
||||
mockAxios.delete!.mockClear();
|
||||
});
|
||||
|
||||
it('should delete the uploaded file after successful OCR processing', async () => {
|
||||
mockLoadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock file upload response
|
||||
mockAxios.post!.mockResolvedValueOnce({
|
||||
data: {
|
||||
id: 'file-cleanup-123',
|
||||
object: 'file',
|
||||
bytes: 1024,
|
||||
created_at: Date.now(),
|
||||
filename: 'document.pdf',
|
||||
purpose: 'ocr',
|
||||
} as MistralFileUploadResponse,
|
||||
});
|
||||
|
||||
// Mock signed URL response
|
||||
mockAxios.get!.mockResolvedValueOnce({
|
||||
data: {
|
||||
url: 'https://signed-url.com',
|
||||
expires_at: Date.now() + 86400000,
|
||||
} as MistralSignedUrlResponse,
|
||||
});
|
||||
|
||||
// Mock OCR response
|
||||
mockAxios.post!.mockResolvedValueOnce({
|
||||
data: {
|
||||
model: 'mistral-ocr-latest',
|
||||
pages: [
|
||||
{
|
||||
index: 0,
|
||||
markdown: 'OCR content',
|
||||
images: [],
|
||||
dimensions: { dpi: 300, height: 1100, width: 850 },
|
||||
},
|
||||
],
|
||||
document_annotation: '',
|
||||
usage_info: {
|
||||
pages_processed: 1,
|
||||
doc_size_bytes: 1024,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Mock delete file response
|
||||
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: '${OCR_API_KEY}',
|
||||
baseURL: '${OCR_BASEURL}',
|
||||
mistralModel: 'mistral-ocr-latest',
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ExpressRequest;
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
mimetype: 'application/pdf',
|
||||
} as Express.Multer.File;
|
||||
|
||||
await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
// Verify delete was called with correct parameters
|
||||
expect(mockAxios.delete).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/files/file-cleanup-123',
|
||||
{
|
||||
headers: {
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
},
|
||||
);
|
||||
expect(mockAxios.delete).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should delete the uploaded file even when OCR processing fails', async () => {
|
||||
mockLoadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock file upload response
|
||||
mockAxios.post!.mockResolvedValueOnce({
|
||||
data: {
|
||||
id: 'file-cleanup-456',
|
||||
object: 'file',
|
||||
bytes: 1024,
|
||||
created_at: Date.now(),
|
||||
filename: 'document.pdf',
|
||||
purpose: 'ocr',
|
||||
} as MistralFileUploadResponse,
|
||||
});
|
||||
|
||||
// Mock signed URL response
|
||||
mockAxios.get!.mockResolvedValueOnce({
|
||||
data: {
|
||||
url: 'https://signed-url.com',
|
||||
expires_at: Date.now() + 86400000,
|
||||
} as MistralSignedUrlResponse,
|
||||
});
|
||||
|
||||
// Mock OCR to fail
|
||||
mockAxios.post!.mockRejectedValueOnce(new Error('OCR processing failed'));
|
||||
|
||||
// Mock delete file response
|
||||
mockAxios.delete!.mockResolvedValueOnce({ data: {} });
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: '${OCR_API_KEY}',
|
||||
baseURL: '${OCR_BASEURL}',
|
||||
mistralModel: 'mistral-ocr-latest',
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ExpressRequest;
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
mimetype: 'application/pdf',
|
||||
} as Express.Multer.File;
|
||||
|
||||
await expect(
|
||||
uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
}),
|
||||
).rejects.toThrow('Error uploading document to Mistral OCR API');
|
||||
|
||||
// Verify delete was still called despite the error
|
||||
expect(mockAxios.delete).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/files/file-cleanup-456',
|
||||
{
|
||||
headers: {
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
},
|
||||
);
|
||||
expect(mockAxios.delete).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should handle deletion errors gracefully without throwing', async () => {
|
||||
mockLoadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock file upload response
|
||||
mockAxios.post!.mockResolvedValueOnce({
|
||||
data: {
|
||||
id: 'file-cleanup-789',
|
||||
object: 'file',
|
||||
bytes: 1024,
|
||||
created_at: Date.now(),
|
||||
filename: 'document.pdf',
|
||||
purpose: 'ocr',
|
||||
} as MistralFileUploadResponse,
|
||||
});
|
||||
|
||||
// Mock signed URL response
|
||||
mockAxios.get!.mockResolvedValueOnce({
|
||||
data: {
|
||||
url: 'https://signed-url.com',
|
||||
expires_at: Date.now() + 86400000,
|
||||
} as MistralSignedUrlResponse,
|
||||
});
|
||||
|
||||
// Mock OCR response
|
||||
mockAxios.post!.mockResolvedValueOnce({
|
||||
data: {
|
||||
model: 'mistral-ocr-latest',
|
||||
pages: [
|
||||
{
|
||||
index: 0,
|
||||
markdown: 'OCR content',
|
||||
images: [],
|
||||
dimensions: { dpi: 300, height: 1100, width: 850 },
|
||||
},
|
||||
],
|
||||
document_annotation: '',
|
||||
usage_info: {
|
||||
pages_processed: 1,
|
||||
doc_size_bytes: 1024,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Mock delete to fail
|
||||
mockAxios.delete!.mockRejectedValueOnce(new Error('Delete failed'));
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: '${OCR_API_KEY}',
|
||||
baseURL: '${OCR_BASEURL}',
|
||||
mistralModel: 'mistral-ocr-latest',
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ExpressRequest;
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
mimetype: 'application/pdf',
|
||||
} as Express.Multer.File;
|
||||
|
||||
// Should not throw even if delete fails
|
||||
const result = await uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
filename: 'document.pdf',
|
||||
bytes: expect.any(Number),
|
||||
filepath: 'mistral_ocr',
|
||||
text: 'OCR content\n\n',
|
||||
images: [],
|
||||
});
|
||||
|
||||
// Verify delete was attempted
|
||||
expect(mockAxios.delete).toHaveBeenCalledWith(
|
||||
'https://api.mistral.ai/v1/files/file-cleanup-789',
|
||||
{
|
||||
headers: {
|
||||
Authorization: 'Bearer test-api-key',
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
// Verify error was logged
|
||||
expect(mockLogger.error).toHaveBeenCalledWith(
|
||||
'Error deleting Mistral file file-cleanup-789:',
|
||||
expect.any(Error),
|
||||
);
|
||||
});
|
||||
|
||||
it('should not attempt cleanup if file upload fails', async () => {
|
||||
mockLoadAuthValues.mockResolvedValue({
|
||||
OCR_API_KEY: 'test-api-key',
|
||||
OCR_BASEURL: 'https://api.mistral.ai/v1',
|
||||
});
|
||||
|
||||
// Mock file upload to fail
|
||||
mockAxios.post!.mockRejectedValueOnce(new Error('Upload failed'));
|
||||
|
||||
const req = {
|
||||
user: { id: 'user123' },
|
||||
app: {
|
||||
locals: {
|
||||
ocr: {
|
||||
apiKey: '${OCR_API_KEY}',
|
||||
baseURL: '${OCR_BASEURL}',
|
||||
mistralModel: 'mistral-ocr-latest',
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ExpressRequest;
|
||||
|
||||
const file = {
|
||||
path: '/tmp/upload/file.pdf',
|
||||
originalname: 'document.pdf',
|
||||
mimetype: 'application/pdf',
|
||||
} as Express.Multer.File;
|
||||
|
||||
await expect(
|
||||
uploadMistralOCR({
|
||||
req,
|
||||
file,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
}),
|
||||
).rejects.toThrow('Error uploading document to Mistral OCR API');
|
||||
|
||||
// Verify delete was NOT called since upload failed
|
||||
expect(mockAxios.delete).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('uploadAzureMistralOCR', () => {
|
||||
|
|
|
|||
|
|
@ -172,6 +172,35 @@ export async function performOCR({
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a file from Mistral API
|
||||
* @param params Delete parameters
|
||||
* @param params.fileId The file ID to delete
|
||||
* @param params.apiKey Mistral API key
|
||||
* @param params.baseURL Mistral API base URL
|
||||
* @returns Promise that resolves when the file is deleted
|
||||
*/
|
||||
export async function deleteMistralFile({
|
||||
fileId,
|
||||
apiKey,
|
||||
baseURL = DEFAULT_MISTRAL_BASE_URL,
|
||||
}: {
|
||||
fileId: string;
|
||||
apiKey: string;
|
||||
baseURL?: string;
|
||||
}): Promise<void> {
|
||||
try {
|
||||
const result = await axios.delete(`${baseURL}/files/${fileId}`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
});
|
||||
logger.debug(`Mistral file ${fileId} deleted successfully:`, result.data);
|
||||
} catch (error) {
|
||||
logger.error(`Error deleting Mistral file ${fileId}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a value needs to be loaded from environment
|
||||
*/
|
||||
|
|
@ -335,8 +364,14 @@ function createOCRError(error: unknown, baseMessage: string): Error {
|
|||
* along with the `filename` and `bytes` properties.
|
||||
*/
|
||||
export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRUploadResult> => {
|
||||
let mistralFileId: string | undefined;
|
||||
let apiKey: string | undefined;
|
||||
let baseURL: string | undefined;
|
||||
|
||||
try {
|
||||
const { apiKey, baseURL } = await loadAuthConfig(context);
|
||||
const authConfig = await loadAuthConfig(context);
|
||||
apiKey = authConfig.apiKey;
|
||||
baseURL = authConfig.baseURL;
|
||||
const model = getModelConfig(context.req.app.locals?.ocr);
|
||||
|
||||
const mistralFile = await uploadDocumentToMistral({
|
||||
|
|
@ -346,6 +381,8 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
|
|||
baseURL,
|
||||
});
|
||||
|
||||
mistralFileId = mistralFile.id;
|
||||
|
||||
const signedUrlResponse = await getSignedUrl({
|
||||
apiKey,
|
||||
baseURL,
|
||||
|
|
@ -354,11 +391,11 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
|
|||
|
||||
const documentType = getDocumentType(context.file);
|
||||
const ocrResult = await performOCR({
|
||||
apiKey,
|
||||
baseURL,
|
||||
model,
|
||||
url: signedUrlResponse.url,
|
||||
documentType,
|
||||
baseURL,
|
||||
apiKey,
|
||||
model,
|
||||
});
|
||||
|
||||
if (!ocrResult || !ocrResult.pages || ocrResult.pages.length === 0) {
|
||||
|
|
@ -368,6 +405,10 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
|
|||
}
|
||||
const { text, images } = processOCRResult(ocrResult);
|
||||
|
||||
if (mistralFileId && apiKey && baseURL) {
|
||||
await deleteMistralFile({ fileId: mistralFileId, apiKey, baseURL });
|
||||
}
|
||||
|
||||
return {
|
||||
filename: context.file.originalname,
|
||||
bytes: text.length * 4,
|
||||
|
|
@ -376,6 +417,9 @@ export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRU
|
|||
images,
|
||||
};
|
||||
} catch (error) {
|
||||
if (mistralFileId && apiKey && baseURL) {
|
||||
await deleteMistralFile({ fileId: mistralFileId, apiKey, baseURL });
|
||||
}
|
||||
throw createOCRError(error, 'Error uploading document to Mistral OCR API:');
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -602,7 +602,7 @@ export class MCPOAuthHandler {
|
|||
/** Auto-discover OAuth configuration for refresh */
|
||||
const oauthMetadata = await discoverAuthorizationServerMetadata(metadata.serverUrl);
|
||||
|
||||
if (!oauthMetadata.token_endpoint) {
|
||||
if (!oauthMetadata?.token_endpoint) {
|
||||
throw new Error('No token endpoint found in OAuth metadata');
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -63,13 +63,10 @@ export const checkAccess = async ({
|
|||
}
|
||||
|
||||
const role = await getRoleByName(user.role);
|
||||
if (role && role.permissions && role.permissions[permissionType]) {
|
||||
const permissionValue = role?.permissions?.[permissionType as keyof typeof role.permissions];
|
||||
if (role && role.permissions && permissionValue) {
|
||||
const hasAnyPermission = permissions.every((permission) => {
|
||||
if (
|
||||
role.permissions?.[permissionType as keyof typeof role.permissions]?.[
|
||||
permission as keyof (typeof role.permissions)[typeof permissionType]
|
||||
]
|
||||
) {
|
||||
if (permissionValue[permission as keyof typeof permissionValue]) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue