mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-15 15:08:10 +01:00
🔧 feat: deleteRagFile utility for Consistent RAG API document deletion (#11493)
* 🔧 feat: Implement deleteRagFile utility for RAG API document deletion across storage strategies
* chore: import order
* chore: import order & remove unnecessary comments
---------
Co-authored-by: Danny Avila <danacordially@gmail.com>
This commit is contained in:
parent
a89945c24b
commit
2513e0a423
7 changed files with 221 additions and 46 deletions
|
|
@ -4,7 +4,7 @@ const mime = require('mime');
|
|||
const axios = require('axios');
|
||||
const fetch = require('node-fetch');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { getAzureContainerClient } = require('@librechat/api');
|
||||
const { getAzureContainerClient, deleteRagFile } = require('@librechat/api');
|
||||
|
||||
const defaultBasePath = 'images';
|
||||
const { AZURE_STORAGE_PUBLIC_ACCESS = 'true', AZURE_CONTAINER_NAME = 'files' } = process.env;
|
||||
|
|
@ -102,6 +102,8 @@ async function getAzureURL({ fileName, basePath = defaultBasePath, userId, conta
|
|||
* @param {MongoFile} params.file - The file object.
|
||||
*/
|
||||
async function deleteFileFromAzure(req, file) {
|
||||
await deleteRagFile({ userId: req.user.id, file });
|
||||
|
||||
try {
|
||||
const containerClient = await getAzureContainerClient(AZURE_CONTAINER_NAME);
|
||||
const blobPath = file.filepath.split(`${AZURE_CONTAINER_NAME}/`)[1];
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ const path = require('path');
|
|||
const axios = require('axios');
|
||||
const fetch = require('node-fetch');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { getFirebaseStorage } = require('@librechat/api');
|
||||
const { getFirebaseStorage, deleteRagFile } = require('@librechat/api');
|
||||
const { ref, uploadBytes, getDownloadURL, deleteObject } = require('firebase/storage');
|
||||
const { getBufferMetadata } = require('~/server/utils');
|
||||
|
||||
|
|
@ -167,27 +167,7 @@ function extractFirebaseFilePath(urlString) {
|
|||
* Throws an error if there is an issue with deletion.
|
||||
*/
|
||||
const deleteFirebaseFile = async (req, file) => {
|
||||
if (file.embedded && process.env.RAG_API_URL) {
|
||||
const jwtToken = req.headers.authorization.split(' ')[1];
|
||||
try {
|
||||
await axios.delete(`${process.env.RAG_API_URL}/documents`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${jwtToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
accept: 'application/json',
|
||||
},
|
||||
data: [file.file_id],
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.response?.status === 404) {
|
||||
logger.warn(
|
||||
`[deleteFirebaseFile] Document ${file.file_id} not found in RAG API, may have been deleted already`,
|
||||
);
|
||||
} else {
|
||||
logger.error('[deleteFirebaseFile] Error deleting document from RAG API:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
await deleteRagFile({ userId: req.user.id, file });
|
||||
|
||||
const fileName = extractFirebaseFilePath(file.filepath);
|
||||
if (!fileName.includes(req.user.id)) {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const axios = require('axios');
|
||||
const { deleteRagFile } = require('@librechat/api');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
const { generateShortLivedToken } = require('@librechat/api');
|
||||
const { resizeImageBuffer } = require('~/server/services/Files/images/resize');
|
||||
const { getBufferMetadata } = require('~/server/utils');
|
||||
const paths = require('~/config/paths');
|
||||
|
|
@ -213,27 +213,7 @@ const deleteLocalFile = async (req, file) => {
|
|||
/** Filepath stripped of query parameters (e.g., ?manual=true) */
|
||||
const cleanFilepath = file.filepath.split('?')[0];
|
||||
|
||||
if (file.embedded && process.env.RAG_API_URL) {
|
||||
const jwtToken = generateShortLivedToken(req.user.id);
|
||||
try {
|
||||
await axios.delete(`${process.env.RAG_API_URL}/documents`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${jwtToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
accept: 'application/json',
|
||||
},
|
||||
data: [file.file_id],
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.response?.status === 404) {
|
||||
logger.warn(
|
||||
`[deleteLocalFile] Document ${file.file_id} not found in RAG API, may have been deleted already`,
|
||||
);
|
||||
} else {
|
||||
logger.error('[deleteLocalFile] Error deleting document from RAG API:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
await deleteRagFile({ userId: req.user.id, file });
|
||||
|
||||
if (cleanFilepath.startsWith(`/uploads/${req.user.id}`)) {
|
||||
const userUploadDir = path.join(uploads, req.user.id);
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
const fs = require('fs');
|
||||
const fetch = require('node-fetch');
|
||||
const { initializeS3 } = require('@librechat/api');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { FileSources } = require('librechat-data-provider');
|
||||
const { getSignedUrl } = require('@aws-sdk/s3-request-presigner');
|
||||
const { initializeS3, deleteRagFile } = require('@librechat/api');
|
||||
const {
|
||||
PutObjectCommand,
|
||||
GetObjectCommand,
|
||||
|
|
@ -142,6 +142,8 @@ async function saveURLToS3({ userId, URL, fileName, basePath = defaultBasePath }
|
|||
* @returns {Promise<void>}
|
||||
*/
|
||||
async function deleteFileFromS3(req, file) {
|
||||
await deleteRagFile({ userId: req.user.id, file });
|
||||
|
||||
const key = extractKeyFromS3Url(file.filepath);
|
||||
const params = { Bucket: bucketName, Key: key };
|
||||
if (!key.includes(req.user.id)) {
|
||||
|
|
|
|||
|
|
@ -5,5 +5,6 @@ export * from './filter';
|
|||
export * from './mistral/crud';
|
||||
export * from './ocr';
|
||||
export * from './parse';
|
||||
export * from './rag';
|
||||
export * from './validation';
|
||||
export * from './text';
|
||||
|
|
|
|||
150
packages/api/src/files/rag.spec.ts
Normal file
150
packages/api/src/files/rag.spec.ts
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
jest.mock('@librechat/data-schemas', () => ({
|
||||
logger: {
|
||||
debug: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('~/crypto/jwt', () => ({
|
||||
generateShortLivedToken: jest.fn().mockReturnValue('mock-jwt-token'),
|
||||
}));
|
||||
|
||||
jest.mock('axios', () => ({
|
||||
delete: jest.fn(),
|
||||
interceptors: {
|
||||
request: { use: jest.fn(), eject: jest.fn() },
|
||||
response: { use: jest.fn(), eject: jest.fn() },
|
||||
},
|
||||
}));
|
||||
|
||||
import axios from 'axios';
|
||||
import { deleteRagFile } from './rag';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { generateShortLivedToken } from '~/crypto/jwt';
|
||||
|
||||
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||
const mockedLogger = logger as jest.Mocked<typeof logger>;
|
||||
const mockedGenerateShortLivedToken = generateShortLivedToken as jest.MockedFunction<
|
||||
typeof generateShortLivedToken
|
||||
>;
|
||||
|
||||
describe('deleteRagFile', () => {
|
||||
const originalEnv = process.env;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
process.env = { ...originalEnv };
|
||||
process.env.RAG_API_URL = 'http://localhost:8000';
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
process.env = originalEnv;
|
||||
});
|
||||
|
||||
describe('when file is embedded and RAG_API_URL is configured', () => {
|
||||
it('should delete the document from RAG API successfully', async () => {
|
||||
const file = { file_id: 'file-123', embedded: true };
|
||||
mockedAxios.delete.mockResolvedValueOnce({ status: 200 });
|
||||
|
||||
const result = await deleteRagFile({ userId: 'user123', file });
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(mockedGenerateShortLivedToken).toHaveBeenCalledWith('user123');
|
||||
expect(mockedAxios.delete).toHaveBeenCalledWith('http://localhost:8000/documents', {
|
||||
headers: {
|
||||
Authorization: 'Bearer mock-jwt-token',
|
||||
'Content-Type': 'application/json',
|
||||
accept: 'application/json',
|
||||
},
|
||||
data: ['file-123'],
|
||||
});
|
||||
expect(mockedLogger.debug).toHaveBeenCalledWith(
|
||||
'[deleteRagFile] Successfully deleted document file-123 from RAG API',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return true and log warning when document is not found (404)', async () => {
|
||||
const file = { file_id: 'file-not-found', embedded: true };
|
||||
const error = new Error('Not Found') as Error & { response?: { status?: number } };
|
||||
error.response = { status: 404 };
|
||||
mockedAxios.delete.mockRejectedValueOnce(error);
|
||||
|
||||
const result = await deleteRagFile({ userId: 'user123', file });
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(mockedLogger.warn).toHaveBeenCalledWith(
|
||||
'[deleteRagFile] Document file-not-found not found in RAG API, may have been deleted already',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return false and log error on other errors', async () => {
|
||||
const file = { file_id: 'file-error', embedded: true };
|
||||
const error = new Error('Server Error') as Error & { response?: { status?: number } };
|
||||
error.response = { status: 500 };
|
||||
mockedAxios.delete.mockRejectedValueOnce(error);
|
||||
|
||||
const result = await deleteRagFile({ userId: 'user123', file });
|
||||
|
||||
expect(result).toBe(false);
|
||||
expect(mockedLogger.error).toHaveBeenCalledWith(
|
||||
'[deleteRagFile] Error deleting document from RAG API:',
|
||||
'Server Error',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('when file is not embedded', () => {
|
||||
it('should skip RAG deletion and return true', async () => {
|
||||
const file = { file_id: 'file-123', embedded: false };
|
||||
|
||||
const result = await deleteRagFile({ userId: 'user123', file });
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(mockedAxios.delete).not.toHaveBeenCalled();
|
||||
expect(mockedGenerateShortLivedToken).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should skip RAG deletion when embedded is undefined', async () => {
|
||||
const file = { file_id: 'file-123' };
|
||||
|
||||
const result = await deleteRagFile({ userId: 'user123', file });
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(mockedAxios.delete).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('when RAG_API_URL is not configured', () => {
|
||||
it('should skip RAG deletion and return true', async () => {
|
||||
delete process.env.RAG_API_URL;
|
||||
const file = { file_id: 'file-123', embedded: true };
|
||||
|
||||
const result = await deleteRagFile({ userId: 'user123', file });
|
||||
|
||||
expect(result).toBe(true);
|
||||
expect(mockedAxios.delete).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('userId handling', () => {
|
||||
it('should return false when no userId is provided', async () => {
|
||||
const file = { file_id: 'file-123', embedded: true };
|
||||
|
||||
const result = await deleteRagFile({ userId: '', file });
|
||||
|
||||
expect(result).toBe(false);
|
||||
expect(mockedLogger.error).toHaveBeenCalledWith('[deleteRagFile] No user ID provided');
|
||||
expect(mockedAxios.delete).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return false when userId is undefined', async () => {
|
||||
const file = { file_id: 'file-123', embedded: true };
|
||||
|
||||
const result = await deleteRagFile({ userId: undefined as unknown as string, file });
|
||||
|
||||
expect(result).toBe(false);
|
||||
expect(mockedLogger.error).toHaveBeenCalledWith('[deleteRagFile] No user ID provided');
|
||||
});
|
||||
});
|
||||
});
|
||||
60
packages/api/src/files/rag.ts
Normal file
60
packages/api/src/files/rag.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import axios from 'axios';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { generateShortLivedToken } from '~/crypto/jwt';
|
||||
|
||||
interface DeleteRagFileParams {
|
||||
/** The user ID. Required for authentication. If not provided, the function returns false and logs an error. */
|
||||
userId: string;
|
||||
/** The file object. Must have `embedded` and `file_id` properties. */
|
||||
file: {
|
||||
file_id: string;
|
||||
embedded?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes embedded document(s) from the RAG API.
|
||||
* This is a shared utility function used by all file storage strategies
|
||||
* (S3, Azure, Firebase, Local) to delete RAG embeddings when a file is deleted.
|
||||
*
|
||||
* @param params - The parameters object.
|
||||
* @param params.userId - The user ID for authentication.
|
||||
* @param params.file - The file object. Must have `embedded` and `file_id` properties.
|
||||
* @returns Returns true if deletion was successful or skipped, false if there was an error.
|
||||
*/
|
||||
export async function deleteRagFile({ userId, file }: DeleteRagFileParams): Promise<boolean> {
|
||||
if (!file.embedded || !process.env.RAG_API_URL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!userId) {
|
||||
logger.error('[deleteRagFile] No user ID provided');
|
||||
return false;
|
||||
}
|
||||
|
||||
const jwtToken = generateShortLivedToken(userId);
|
||||
|
||||
try {
|
||||
await axios.delete(`${process.env.RAG_API_URL}/documents`, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${jwtToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
accept: 'application/json',
|
||||
},
|
||||
data: [file.file_id],
|
||||
});
|
||||
logger.debug(`[deleteRagFile] Successfully deleted document ${file.file_id} from RAG API`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
const axiosError = error as { response?: { status?: number }; message?: string };
|
||||
if (axiosError.response?.status === 404) {
|
||||
logger.warn(
|
||||
`[deleteRagFile] Document ${file.file_id} not found in RAG API, may have been deleted already`,
|
||||
);
|
||||
return true;
|
||||
} else {
|
||||
logger.error('[deleteRagFile] Error deleting document from RAG API:', axiosError.message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue