From 2513e0a4231b3d054ffe9fe0bffc184fadbf9616 Mon Sep 17 00:00:00 2001 From: ethanlaj Date: Sat, 14 Feb 2026 13:49:36 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20feat:=20`deleteRagFile`=20utilit?= =?UTF-8?q?y=20for=20Consistent=20RAG=20API=20document=20deletion=20(#1149?= =?UTF-8?q?3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🔧 feat: Implement deleteRagFile utility for RAG API document deletion across storage strategies * chore: import order * chore: import order & remove unnecessary comments --------- Co-authored-by: Danny Avila --- api/server/services/Files/Azure/crud.js | 4 +- api/server/services/Files/Firebase/crud.js | 24 +--- api/server/services/Files/Local/crud.js | 24 +--- api/server/services/Files/S3/crud.js | 4 +- packages/api/src/files/index.ts | 1 + packages/api/src/files/rag.spec.ts | 150 +++++++++++++++++++++ packages/api/src/files/rag.ts | 60 +++++++++ 7 files changed, 221 insertions(+), 46 deletions(-) create mode 100644 packages/api/src/files/rag.spec.ts create mode 100644 packages/api/src/files/rag.ts diff --git a/api/server/services/Files/Azure/crud.js b/api/server/services/Files/Azure/crud.js index 25bd749276..8f681bd06c 100644 --- a/api/server/services/Files/Azure/crud.js +++ b/api/server/services/Files/Azure/crud.js @@ -4,7 +4,7 @@ const mime = require('mime'); const axios = require('axios'); const fetch = require('node-fetch'); const { logger } = require('@librechat/data-schemas'); -const { getAzureContainerClient } = require('@librechat/api'); +const { getAzureContainerClient, deleteRagFile } = require('@librechat/api'); const defaultBasePath = 'images'; const { AZURE_STORAGE_PUBLIC_ACCESS = 'true', AZURE_CONTAINER_NAME = 'files' } = process.env; @@ -102,6 +102,8 @@ async function getAzureURL({ fileName, basePath = defaultBasePath, userId, conta * @param {MongoFile} params.file - The file object. */ async function deleteFileFromAzure(req, file) { + await deleteRagFile({ userId: req.user.id, file }); + try { const containerClient = await getAzureContainerClient(AZURE_CONTAINER_NAME); const blobPath = file.filepath.split(`${AZURE_CONTAINER_NAME}/`)[1]; diff --git a/api/server/services/Files/Firebase/crud.js b/api/server/services/Files/Firebase/crud.js index 170df45677..d5e5a409bf 100644 --- a/api/server/services/Files/Firebase/crud.js +++ b/api/server/services/Files/Firebase/crud.js @@ -3,7 +3,7 @@ const path = require('path'); const axios = require('axios'); const fetch = require('node-fetch'); const { logger } = require('@librechat/data-schemas'); -const { getFirebaseStorage } = require('@librechat/api'); +const { getFirebaseStorage, deleteRagFile } = require('@librechat/api'); const { ref, uploadBytes, getDownloadURL, deleteObject } = require('firebase/storage'); const { getBufferMetadata } = require('~/server/utils'); @@ -167,27 +167,7 @@ function extractFirebaseFilePath(urlString) { * Throws an error if there is an issue with deletion. */ const deleteFirebaseFile = async (req, file) => { - if (file.embedded && process.env.RAG_API_URL) { - const jwtToken = req.headers.authorization.split(' ')[1]; - try { - await axios.delete(`${process.env.RAG_API_URL}/documents`, { - headers: { - Authorization: `Bearer ${jwtToken}`, - 'Content-Type': 'application/json', - accept: 'application/json', - }, - data: [file.file_id], - }); - } catch (error) { - if (error.response?.status === 404) { - logger.warn( - `[deleteFirebaseFile] Document ${file.file_id} not found in RAG API, may have been deleted already`, - ); - } else { - logger.error('[deleteFirebaseFile] Error deleting document from RAG API:', error); - } - } - } + await deleteRagFile({ userId: req.user.id, file }); const fileName = extractFirebaseFilePath(file.filepath); if (!fileName.includes(req.user.id)) { diff --git a/api/server/services/Files/Local/crud.js b/api/server/services/Files/Local/crud.js index b43ab75326..1f38a01f83 100644 --- a/api/server/services/Files/Local/crud.js +++ b/api/server/services/Files/Local/crud.js @@ -1,9 +1,9 @@ const fs = require('fs'); const path = require('path'); const axios = require('axios'); +const { deleteRagFile } = require('@librechat/api'); const { logger } = require('@librechat/data-schemas'); const { EModelEndpoint } = require('librechat-data-provider'); -const { generateShortLivedToken } = require('@librechat/api'); const { resizeImageBuffer } = require('~/server/services/Files/images/resize'); const { getBufferMetadata } = require('~/server/utils'); const paths = require('~/config/paths'); @@ -213,27 +213,7 @@ const deleteLocalFile = async (req, file) => { /** Filepath stripped of query parameters (e.g., ?manual=true) */ const cleanFilepath = file.filepath.split('?')[0]; - if (file.embedded && process.env.RAG_API_URL) { - const jwtToken = generateShortLivedToken(req.user.id); - try { - await axios.delete(`${process.env.RAG_API_URL}/documents`, { - headers: { - Authorization: `Bearer ${jwtToken}`, - 'Content-Type': 'application/json', - accept: 'application/json', - }, - data: [file.file_id], - }); - } catch (error) { - if (error.response?.status === 404) { - logger.warn( - `[deleteLocalFile] Document ${file.file_id} not found in RAG API, may have been deleted already`, - ); - } else { - logger.error('[deleteLocalFile] Error deleting document from RAG API:', error); - } - } - } + await deleteRagFile({ userId: req.user.id, file }); if (cleanFilepath.startsWith(`/uploads/${req.user.id}`)) { const userUploadDir = path.join(uploads, req.user.id); diff --git a/api/server/services/Files/S3/crud.js b/api/server/services/Files/S3/crud.js index 8dac767aa2..0721e33b29 100644 --- a/api/server/services/Files/S3/crud.js +++ b/api/server/services/Files/S3/crud.js @@ -1,9 +1,9 @@ const fs = require('fs'); const fetch = require('node-fetch'); -const { initializeS3 } = require('@librechat/api'); const { logger } = require('@librechat/data-schemas'); const { FileSources } = require('librechat-data-provider'); const { getSignedUrl } = require('@aws-sdk/s3-request-presigner'); +const { initializeS3, deleteRagFile } = require('@librechat/api'); const { PutObjectCommand, GetObjectCommand, @@ -142,6 +142,8 @@ async function saveURLToS3({ userId, URL, fileName, basePath = defaultBasePath } * @returns {Promise} */ async function deleteFileFromS3(req, file) { + await deleteRagFile({ userId: req.user.id, file }); + const key = extractKeyFromS3Url(file.filepath); const params = { Bucket: bucketName, Key: key }; if (!key.includes(req.user.id)) { diff --git a/packages/api/src/files/index.ts b/packages/api/src/files/index.ts index 8397878355..3aedc5ba9d 100644 --- a/packages/api/src/files/index.ts +++ b/packages/api/src/files/index.ts @@ -5,5 +5,6 @@ export * from './filter'; export * from './mistral/crud'; export * from './ocr'; export * from './parse'; +export * from './rag'; export * from './validation'; export * from './text'; diff --git a/packages/api/src/files/rag.spec.ts b/packages/api/src/files/rag.spec.ts new file mode 100644 index 0000000000..9d8ea2d4b3 --- /dev/null +++ b/packages/api/src/files/rag.spec.ts @@ -0,0 +1,150 @@ +jest.mock('@librechat/data-schemas', () => ({ + logger: { + debug: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + }, +})); + +jest.mock('~/crypto/jwt', () => ({ + generateShortLivedToken: jest.fn().mockReturnValue('mock-jwt-token'), +})); + +jest.mock('axios', () => ({ + delete: jest.fn(), + interceptors: { + request: { use: jest.fn(), eject: jest.fn() }, + response: { use: jest.fn(), eject: jest.fn() }, + }, +})); + +import axios from 'axios'; +import { deleteRagFile } from './rag'; +import { logger } from '@librechat/data-schemas'; +import { generateShortLivedToken } from '~/crypto/jwt'; + +const mockedAxios = axios as jest.Mocked; +const mockedLogger = logger as jest.Mocked; +const mockedGenerateShortLivedToken = generateShortLivedToken as jest.MockedFunction< + typeof generateShortLivedToken +>; + +describe('deleteRagFile', () => { + const originalEnv = process.env; + + beforeEach(() => { + jest.clearAllMocks(); + process.env = { ...originalEnv }; + process.env.RAG_API_URL = 'http://localhost:8000'; + }); + + afterEach(() => { + process.env = originalEnv; + }); + + describe('when file is embedded and RAG_API_URL is configured', () => { + it('should delete the document from RAG API successfully', async () => { + const file = { file_id: 'file-123', embedded: true }; + mockedAxios.delete.mockResolvedValueOnce({ status: 200 }); + + const result = await deleteRagFile({ userId: 'user123', file }); + + expect(result).toBe(true); + expect(mockedGenerateShortLivedToken).toHaveBeenCalledWith('user123'); + expect(mockedAxios.delete).toHaveBeenCalledWith('http://localhost:8000/documents', { + headers: { + Authorization: 'Bearer mock-jwt-token', + 'Content-Type': 'application/json', + accept: 'application/json', + }, + data: ['file-123'], + }); + expect(mockedLogger.debug).toHaveBeenCalledWith( + '[deleteRagFile] Successfully deleted document file-123 from RAG API', + ); + }); + + it('should return true and log warning when document is not found (404)', async () => { + const file = { file_id: 'file-not-found', embedded: true }; + const error = new Error('Not Found') as Error & { response?: { status?: number } }; + error.response = { status: 404 }; + mockedAxios.delete.mockRejectedValueOnce(error); + + const result = await deleteRagFile({ userId: 'user123', file }); + + expect(result).toBe(true); + expect(mockedLogger.warn).toHaveBeenCalledWith( + '[deleteRagFile] Document file-not-found not found in RAG API, may have been deleted already', + ); + }); + + it('should return false and log error on other errors', async () => { + const file = { file_id: 'file-error', embedded: true }; + const error = new Error('Server Error') as Error & { response?: { status?: number } }; + error.response = { status: 500 }; + mockedAxios.delete.mockRejectedValueOnce(error); + + const result = await deleteRagFile({ userId: 'user123', file }); + + expect(result).toBe(false); + expect(mockedLogger.error).toHaveBeenCalledWith( + '[deleteRagFile] Error deleting document from RAG API:', + 'Server Error', + ); + }); + }); + + describe('when file is not embedded', () => { + it('should skip RAG deletion and return true', async () => { + const file = { file_id: 'file-123', embedded: false }; + + const result = await deleteRagFile({ userId: 'user123', file }); + + expect(result).toBe(true); + expect(mockedAxios.delete).not.toHaveBeenCalled(); + expect(mockedGenerateShortLivedToken).not.toHaveBeenCalled(); + }); + + it('should skip RAG deletion when embedded is undefined', async () => { + const file = { file_id: 'file-123' }; + + const result = await deleteRagFile({ userId: 'user123', file }); + + expect(result).toBe(true); + expect(mockedAxios.delete).not.toHaveBeenCalled(); + }); + }); + + describe('when RAG_API_URL is not configured', () => { + it('should skip RAG deletion and return true', async () => { + delete process.env.RAG_API_URL; + const file = { file_id: 'file-123', embedded: true }; + + const result = await deleteRagFile({ userId: 'user123', file }); + + expect(result).toBe(true); + expect(mockedAxios.delete).not.toHaveBeenCalled(); + }); + }); + + describe('userId handling', () => { + it('should return false when no userId is provided', async () => { + const file = { file_id: 'file-123', embedded: true }; + + const result = await deleteRagFile({ userId: '', file }); + + expect(result).toBe(false); + expect(mockedLogger.error).toHaveBeenCalledWith('[deleteRagFile] No user ID provided'); + expect(mockedAxios.delete).not.toHaveBeenCalled(); + }); + + it('should return false when userId is undefined', async () => { + const file = { file_id: 'file-123', embedded: true }; + + const result = await deleteRagFile({ userId: undefined as unknown as string, file }); + + expect(result).toBe(false); + expect(mockedLogger.error).toHaveBeenCalledWith('[deleteRagFile] No user ID provided'); + }); + }); +}); diff --git a/packages/api/src/files/rag.ts b/packages/api/src/files/rag.ts new file mode 100644 index 0000000000..7155f62c12 --- /dev/null +++ b/packages/api/src/files/rag.ts @@ -0,0 +1,60 @@ +import axios from 'axios'; +import { logger } from '@librechat/data-schemas'; +import { generateShortLivedToken } from '~/crypto/jwt'; + +interface DeleteRagFileParams { + /** The user ID. Required for authentication. If not provided, the function returns false and logs an error. */ + userId: string; + /** The file object. Must have `embedded` and `file_id` properties. */ + file: { + file_id: string; + embedded?: boolean; + }; +} + +/** + * Deletes embedded document(s) from the RAG API. + * This is a shared utility function used by all file storage strategies + * (S3, Azure, Firebase, Local) to delete RAG embeddings when a file is deleted. + * + * @param params - The parameters object. + * @param params.userId - The user ID for authentication. + * @param params.file - The file object. Must have `embedded` and `file_id` properties. + * @returns Returns true if deletion was successful or skipped, false if there was an error. + */ +export async function deleteRagFile({ userId, file }: DeleteRagFileParams): Promise { + if (!file.embedded || !process.env.RAG_API_URL) { + return true; + } + + if (!userId) { + logger.error('[deleteRagFile] No user ID provided'); + return false; + } + + const jwtToken = generateShortLivedToken(userId); + + try { + await axios.delete(`${process.env.RAG_API_URL}/documents`, { + headers: { + Authorization: `Bearer ${jwtToken}`, + 'Content-Type': 'application/json', + accept: 'application/json', + }, + data: [file.file_id], + }); + logger.debug(`[deleteRagFile] Successfully deleted document ${file.file_id} from RAG API`); + return true; + } catch (error) { + const axiosError = error as { response?: { status?: number }; message?: string }; + if (axiosError.response?.status === 404) { + logger.warn( + `[deleteRagFile] Document ${file.file_id} not found in RAG API, may have been deleted already`, + ); + return true; + } else { + logger.error('[deleteRagFile] Error deleting document from RAG API:', axiosError.message); + return false; + } + } +}