🔧 feat: deleteRagFile utility for Consistent RAG API document deletion (#11493)

* 🔧 feat: Implement deleteRagFile utility for RAG API document deletion across storage strategies

* chore: import order

* chore: import order & remove unnecessary comments

---------

Co-authored-by: Danny Avila <danacordially@gmail.com>
This commit is contained in:
ethanlaj 2026-02-14 13:49:36 -05:00 committed by Danny Avila
parent a89945c24b
commit 2513e0a423
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
7 changed files with 221 additions and 46 deletions

View file

@ -4,7 +4,7 @@ const mime = require('mime');
const axios = require('axios');
const fetch = require('node-fetch');
const { logger } = require('@librechat/data-schemas');
const { getAzureContainerClient } = require('@librechat/api');
const { getAzureContainerClient, deleteRagFile } = require('@librechat/api');
const defaultBasePath = 'images';
const { AZURE_STORAGE_PUBLIC_ACCESS = 'true', AZURE_CONTAINER_NAME = 'files' } = process.env;
@ -102,6 +102,8 @@ async function getAzureURL({ fileName, basePath = defaultBasePath, userId, conta
* @param {MongoFile} params.file - The file object.
*/
async function deleteFileFromAzure(req, file) {
await deleteRagFile({ userId: req.user.id, file });
try {
const containerClient = await getAzureContainerClient(AZURE_CONTAINER_NAME);
const blobPath = file.filepath.split(`${AZURE_CONTAINER_NAME}/`)[1];

View file

@ -3,7 +3,7 @@ const path = require('path');
const axios = require('axios');
const fetch = require('node-fetch');
const { logger } = require('@librechat/data-schemas');
const { getFirebaseStorage } = require('@librechat/api');
const { getFirebaseStorage, deleteRagFile } = require('@librechat/api');
const { ref, uploadBytes, getDownloadURL, deleteObject } = require('firebase/storage');
const { getBufferMetadata } = require('~/server/utils');
@ -167,27 +167,7 @@ function extractFirebaseFilePath(urlString) {
* Throws an error if there is an issue with deletion.
*/
const deleteFirebaseFile = async (req, file) => {
if (file.embedded && process.env.RAG_API_URL) {
const jwtToken = req.headers.authorization.split(' ')[1];
try {
await axios.delete(`${process.env.RAG_API_URL}/documents`, {
headers: {
Authorization: `Bearer ${jwtToken}`,
'Content-Type': 'application/json',
accept: 'application/json',
},
data: [file.file_id],
});
} catch (error) {
if (error.response?.status === 404) {
logger.warn(
`[deleteFirebaseFile] Document ${file.file_id} not found in RAG API, may have been deleted already`,
);
} else {
logger.error('[deleteFirebaseFile] Error deleting document from RAG API:', error);
}
}
}
await deleteRagFile({ userId: req.user.id, file });
const fileName = extractFirebaseFilePath(file.filepath);
if (!fileName.includes(req.user.id)) {

View file

@ -1,9 +1,9 @@
const fs = require('fs');
const path = require('path');
const axios = require('axios');
const { deleteRagFile } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { EModelEndpoint } = require('librechat-data-provider');
const { generateShortLivedToken } = require('@librechat/api');
const { resizeImageBuffer } = require('~/server/services/Files/images/resize');
const { getBufferMetadata } = require('~/server/utils');
const paths = require('~/config/paths');
@ -213,27 +213,7 @@ const deleteLocalFile = async (req, file) => {
/** Filepath stripped of query parameters (e.g., ?manual=true) */
const cleanFilepath = file.filepath.split('?')[0];
if (file.embedded && process.env.RAG_API_URL) {
const jwtToken = generateShortLivedToken(req.user.id);
try {
await axios.delete(`${process.env.RAG_API_URL}/documents`, {
headers: {
Authorization: `Bearer ${jwtToken}`,
'Content-Type': 'application/json',
accept: 'application/json',
},
data: [file.file_id],
});
} catch (error) {
if (error.response?.status === 404) {
logger.warn(
`[deleteLocalFile] Document ${file.file_id} not found in RAG API, may have been deleted already`,
);
} else {
logger.error('[deleteLocalFile] Error deleting document from RAG API:', error);
}
}
}
await deleteRagFile({ userId: req.user.id, file });
if (cleanFilepath.startsWith(`/uploads/${req.user.id}`)) {
const userUploadDir = path.join(uploads, req.user.id);

View file

@ -1,9 +1,9 @@
const fs = require('fs');
const fetch = require('node-fetch');
const { initializeS3 } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { FileSources } = require('librechat-data-provider');
const { getSignedUrl } = require('@aws-sdk/s3-request-presigner');
const { initializeS3, deleteRagFile } = require('@librechat/api');
const {
PutObjectCommand,
GetObjectCommand,
@ -142,6 +142,8 @@ async function saveURLToS3({ userId, URL, fileName, basePath = defaultBasePath }
* @returns {Promise<void>}
*/
async function deleteFileFromS3(req, file) {
await deleteRagFile({ userId: req.user.id, file });
const key = extractKeyFromS3Url(file.filepath);
const params = { Bucket: bucketName, Key: key };
if (!key.includes(req.user.id)) {

View file

@ -5,5 +5,6 @@ export * from './filter';
export * from './mistral/crud';
export * from './ocr';
export * from './parse';
export * from './rag';
export * from './validation';
export * from './text';

View file

@ -0,0 +1,150 @@
jest.mock('@librechat/data-schemas', () => ({
logger: {
debug: jest.fn(),
warn: jest.fn(),
error: jest.fn(),
},
}));
jest.mock('~/crypto/jwt', () => ({
generateShortLivedToken: jest.fn().mockReturnValue('mock-jwt-token'),
}));
jest.mock('axios', () => ({
delete: jest.fn(),
interceptors: {
request: { use: jest.fn(), eject: jest.fn() },
response: { use: jest.fn(), eject: jest.fn() },
},
}));
import axios from 'axios';
import { deleteRagFile } from './rag';
import { logger } from '@librechat/data-schemas';
import { generateShortLivedToken } from '~/crypto/jwt';
const mockedAxios = axios as jest.Mocked<typeof axios>;
const mockedLogger = logger as jest.Mocked<typeof logger>;
const mockedGenerateShortLivedToken = generateShortLivedToken as jest.MockedFunction<
typeof generateShortLivedToken
>;
describe('deleteRagFile', () => {
const originalEnv = process.env;
beforeEach(() => {
jest.clearAllMocks();
process.env = { ...originalEnv };
process.env.RAG_API_URL = 'http://localhost:8000';
});
afterEach(() => {
process.env = originalEnv;
});
describe('when file is embedded and RAG_API_URL is configured', () => {
it('should delete the document from RAG API successfully', async () => {
const file = { file_id: 'file-123', embedded: true };
mockedAxios.delete.mockResolvedValueOnce({ status: 200 });
const result = await deleteRagFile({ userId: 'user123', file });
expect(result).toBe(true);
expect(mockedGenerateShortLivedToken).toHaveBeenCalledWith('user123');
expect(mockedAxios.delete).toHaveBeenCalledWith('http://localhost:8000/documents', {
headers: {
Authorization: 'Bearer mock-jwt-token',
'Content-Type': 'application/json',
accept: 'application/json',
},
data: ['file-123'],
});
expect(mockedLogger.debug).toHaveBeenCalledWith(
'[deleteRagFile] Successfully deleted document file-123 from RAG API',
);
});
it('should return true and log warning when document is not found (404)', async () => {
const file = { file_id: 'file-not-found', embedded: true };
const error = new Error('Not Found') as Error & { response?: { status?: number } };
error.response = { status: 404 };
mockedAxios.delete.mockRejectedValueOnce(error);
const result = await deleteRagFile({ userId: 'user123', file });
expect(result).toBe(true);
expect(mockedLogger.warn).toHaveBeenCalledWith(
'[deleteRagFile] Document file-not-found not found in RAG API, may have been deleted already',
);
});
it('should return false and log error on other errors', async () => {
const file = { file_id: 'file-error', embedded: true };
const error = new Error('Server Error') as Error & { response?: { status?: number } };
error.response = { status: 500 };
mockedAxios.delete.mockRejectedValueOnce(error);
const result = await deleteRagFile({ userId: 'user123', file });
expect(result).toBe(false);
expect(mockedLogger.error).toHaveBeenCalledWith(
'[deleteRagFile] Error deleting document from RAG API:',
'Server Error',
);
});
});
describe('when file is not embedded', () => {
it('should skip RAG deletion and return true', async () => {
const file = { file_id: 'file-123', embedded: false };
const result = await deleteRagFile({ userId: 'user123', file });
expect(result).toBe(true);
expect(mockedAxios.delete).not.toHaveBeenCalled();
expect(mockedGenerateShortLivedToken).not.toHaveBeenCalled();
});
it('should skip RAG deletion when embedded is undefined', async () => {
const file = { file_id: 'file-123' };
const result = await deleteRagFile({ userId: 'user123', file });
expect(result).toBe(true);
expect(mockedAxios.delete).not.toHaveBeenCalled();
});
});
describe('when RAG_API_URL is not configured', () => {
it('should skip RAG deletion and return true', async () => {
delete process.env.RAG_API_URL;
const file = { file_id: 'file-123', embedded: true };
const result = await deleteRagFile({ userId: 'user123', file });
expect(result).toBe(true);
expect(mockedAxios.delete).not.toHaveBeenCalled();
});
});
describe('userId handling', () => {
it('should return false when no userId is provided', async () => {
const file = { file_id: 'file-123', embedded: true };
const result = await deleteRagFile({ userId: '', file });
expect(result).toBe(false);
expect(mockedLogger.error).toHaveBeenCalledWith('[deleteRagFile] No user ID provided');
expect(mockedAxios.delete).not.toHaveBeenCalled();
});
it('should return false when userId is undefined', async () => {
const file = { file_id: 'file-123', embedded: true };
const result = await deleteRagFile({ userId: undefined as unknown as string, file });
expect(result).toBe(false);
expect(mockedLogger.error).toHaveBeenCalledWith('[deleteRagFile] No user ID provided');
});
});
});

View file

@ -0,0 +1,60 @@
import axios from 'axios';
import { logger } from '@librechat/data-schemas';
import { generateShortLivedToken } from '~/crypto/jwt';
interface DeleteRagFileParams {
/** The user ID. Required for authentication. If not provided, the function returns false and logs an error. */
userId: string;
/** The file object. Must have `embedded` and `file_id` properties. */
file: {
file_id: string;
embedded?: boolean;
};
}
/**
* Deletes embedded document(s) from the RAG API.
* This is a shared utility function used by all file storage strategies
* (S3, Azure, Firebase, Local) to delete RAG embeddings when a file is deleted.
*
* @param params - The parameters object.
* @param params.userId - The user ID for authentication.
* @param params.file - The file object. Must have `embedded` and `file_id` properties.
* @returns Returns true if deletion was successful or skipped, false if there was an error.
*/
export async function deleteRagFile({ userId, file }: DeleteRagFileParams): Promise<boolean> {
if (!file.embedded || !process.env.RAG_API_URL) {
return true;
}
if (!userId) {
logger.error('[deleteRagFile] No user ID provided');
return false;
}
const jwtToken = generateShortLivedToken(userId);
try {
await axios.delete(`${process.env.RAG_API_URL}/documents`, {
headers: {
Authorization: `Bearer ${jwtToken}`,
'Content-Type': 'application/json',
accept: 'application/json',
},
data: [file.file_id],
});
logger.debug(`[deleteRagFile] Successfully deleted document ${file.file_id} from RAG API`);
return true;
} catch (error) {
const axiosError = error as { response?: { status?: number }; message?: string };
if (axiosError.response?.status === 404) {
logger.warn(
`[deleteRagFile] Document ${file.file_id} not found in RAG API, may have been deleted already`,
);
return true;
} else {
logger.error('[deleteRagFile] Error deleting document from RAG API:', axiosError.message);
return false;
}
}
}