mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00

* Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
246 lines
9.2 KiB
JavaScript
246 lines
9.2 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
const { EModelEndpoint, Constants } = require('librechat-data-provider');
|
|
const { ImportBatchBuilder } = require('./importBatchBuilder');
|
|
const { getImporter } = require('./importers');
|
|
|
|
// Mocking the ImportBatchBuilder class and its methods
|
|
jest.mock('./importBatchBuilder', () => {
|
|
return {
|
|
ImportBatchBuilder: jest.fn().mockImplementation(() => {
|
|
return {
|
|
startConversation: jest.fn().mockResolvedValue(undefined),
|
|
addUserMessage: jest.fn().mockResolvedValue(undefined),
|
|
addGptMessage: jest.fn().mockResolvedValue(undefined),
|
|
saveMessage: jest.fn().mockResolvedValue(undefined),
|
|
finishConversation: jest.fn().mockResolvedValue(undefined),
|
|
saveBatch: jest.fn().mockResolvedValue(undefined),
|
|
};
|
|
}),
|
|
};
|
|
});
|
|
|
|
describe('importChatGptConvo', () => {
|
|
it('should import conversation correctly', async () => {
|
|
const expectedNumberOfMessages = 19;
|
|
const expectedNumberOfConversations = 2;
|
|
// Given
|
|
const jsonData = JSON.parse(
|
|
fs.readFileSync(path.join(__dirname, '__data__', 'chatgpt-export.json'), 'utf8'),
|
|
);
|
|
const requestUserId = 'user-123';
|
|
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
|
|
|
|
// When
|
|
const importer = getImporter(jsonData);
|
|
await importer(jsonData, requestUserId, mockedBuilderFactory);
|
|
|
|
// Then
|
|
expect(mockedBuilderFactory).toHaveBeenCalledWith(requestUserId);
|
|
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
|
|
|
|
expect(mockImportBatchBuilder.startConversation).toHaveBeenCalledWith(EModelEndpoint.openAI);
|
|
expect(mockImportBatchBuilder.saveMessage).toHaveBeenCalledTimes(expectedNumberOfMessages); // Adjust expected number
|
|
expect(mockImportBatchBuilder.finishConversation).toHaveBeenCalledTimes(
|
|
expectedNumberOfConversations,
|
|
); // Adjust expected number
|
|
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
|
|
});
|
|
it('should maintain correct message hierarchy (tree parent/children relationship)', async () => {
|
|
// Prepare test data with known hierarchy
|
|
const jsonData = JSON.parse(
|
|
fs.readFileSync(path.join(__dirname, '__data__', 'chatgpt-tree.json'), 'utf8'),
|
|
);
|
|
|
|
const requestUserId = 'user-123';
|
|
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
|
|
|
|
// When
|
|
const importer = getImporter(jsonData);
|
|
await importer(jsonData, requestUserId, mockedBuilderFactory);
|
|
|
|
// Then
|
|
expect(mockedBuilderFactory).toHaveBeenCalledWith(requestUserId);
|
|
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
|
|
|
|
const entries = Object.keys(jsonData[0].mapping);
|
|
// Filter entries that should be processed (not system and have content)
|
|
const messageEntries = entries.filter(
|
|
(id) =>
|
|
jsonData[0].mapping[id].message &&
|
|
jsonData[0].mapping[id].message.author.role !== 'system' &&
|
|
jsonData[0].mapping[id].message.content,
|
|
);
|
|
|
|
// Expect the saveMessage to be called for each valid entry
|
|
expect(mockImportBatchBuilder.saveMessage).toHaveBeenCalledTimes(messageEntries.length);
|
|
|
|
const idToUUIDMap = new Map();
|
|
// Map original IDs to dynamically generated UUIDs
|
|
mockImportBatchBuilder.saveMessage.mock.calls.forEach((call, index) => {
|
|
const originalId = messageEntries[index];
|
|
idToUUIDMap.set(originalId, call[0].messageId);
|
|
});
|
|
|
|
// Validate the UUID map contains all expected entries
|
|
expect(idToUUIDMap.size).toBe(messageEntries.length);
|
|
|
|
// Validate correct parent-child relationships
|
|
messageEntries.forEach((id) => {
|
|
const { parent } = jsonData[0].mapping[id];
|
|
|
|
const expectedParentId = parent
|
|
? idToUUIDMap.get(parent) ?? Constants.NO_PARENT
|
|
: Constants.NO_PARENT;
|
|
|
|
const actualParentId = idToUUIDMap.get(id)
|
|
? mockImportBatchBuilder.saveMessage.mock.calls.find(
|
|
(call) => call[0].messageId === idToUUIDMap.get(id),
|
|
)[0].parentMessageId
|
|
: Constants.NO_PARENT;
|
|
|
|
expect(actualParentId).toBe(expectedParentId);
|
|
});
|
|
|
|
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
|
|
});
|
|
});
|
|
|
|
describe('importLibreChatConvo', () => {
|
|
it('should import conversation correctly', async () => {
|
|
const expectedNumberOfMessages = 6;
|
|
const expectedNumberOfConversations = 1;
|
|
|
|
// Given
|
|
const jsonData = JSON.parse(
|
|
fs.readFileSync(path.join(__dirname, '__data__', 'librechat-export.json'), 'utf8'),
|
|
);
|
|
const requestUserId = 'user-123';
|
|
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
|
|
|
|
// When
|
|
const importer = getImporter(jsonData);
|
|
await importer(jsonData, requestUserId, mockedBuilderFactory);
|
|
|
|
// Then
|
|
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
|
|
expect(mockImportBatchBuilder.startConversation).toHaveBeenCalledWith(EModelEndpoint.openAI);
|
|
expect(mockImportBatchBuilder.saveMessage).toHaveBeenCalledTimes(expectedNumberOfMessages); // Adjust expected number
|
|
expect(mockImportBatchBuilder.finishConversation).toHaveBeenCalledTimes(
|
|
expectedNumberOfConversations,
|
|
); // Adjust expected number
|
|
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
|
|
});
|
|
it('should maintain correct message hierarchy (tree parent/children relationship)', async () => {
|
|
// Load test data
|
|
const jsonData = JSON.parse(
|
|
fs.readFileSync(path.join(__dirname, '__data__', 'librechat-tree.json'), 'utf8'),
|
|
);
|
|
const requestUserId = 'user-123';
|
|
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
|
|
|
|
// When
|
|
const importer = getImporter(jsonData);
|
|
await importer(jsonData, requestUserId, mockedBuilderFactory);
|
|
|
|
// Then
|
|
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
|
|
|
|
// Create a map to track original message IDs to new UUIDs
|
|
const idToUUIDMap = new Map();
|
|
mockImportBatchBuilder.saveMessage.mock.calls.forEach((call) => {
|
|
const message = call[0];
|
|
idToUUIDMap.set(message.originalMessageId, message.messageId);
|
|
});
|
|
|
|
// Function to recursively check children
|
|
const checkChildren = (children, parentId) => {
|
|
children.forEach((child) => {
|
|
const childUUID = idToUUIDMap.get(child.messageId);
|
|
const expectedParentId = idToUUIDMap.get(parentId) ?? null;
|
|
const messageCall = mockImportBatchBuilder.saveMessage.mock.calls.find(
|
|
(call) => call[0].messageId === childUUID,
|
|
);
|
|
|
|
const actualParentId = messageCall[0].parentMessageId;
|
|
expect(actualParentId).toBe(expectedParentId);
|
|
|
|
if (child.children && child.children.length > 0) {
|
|
checkChildren(child.children, child.messageId);
|
|
}
|
|
});
|
|
};
|
|
|
|
// Start hierarchy validation from root messages
|
|
checkChildren(jsonData.messagesTree, null); // Assuming root messages have no parent
|
|
|
|
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
|
|
});
|
|
});
|
|
|
|
describe('importChatBotUiConvo', () => {
|
|
it('should import custom conversation correctly', async () => {
|
|
// Given
|
|
const jsonData = JSON.parse(
|
|
fs.readFileSync(path.join(__dirname, '__data__', 'chatbotui-export.json'), 'utf8'),
|
|
);
|
|
const requestUserId = 'custom-user-456';
|
|
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
|
|
|
|
// When
|
|
const importer = getImporter(jsonData);
|
|
await importer(jsonData, requestUserId, mockedBuilderFactory);
|
|
|
|
// Then
|
|
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
|
|
expect(mockImportBatchBuilder.startConversation).toHaveBeenCalledWith('openAI');
|
|
|
|
// User messages
|
|
expect(mockImportBatchBuilder.addUserMessage).toHaveBeenCalledTimes(3);
|
|
expect(mockImportBatchBuilder.addUserMessage).toHaveBeenNthCalledWith(
|
|
1,
|
|
'Hello what are you able to do?',
|
|
);
|
|
expect(mockImportBatchBuilder.addUserMessage).toHaveBeenNthCalledWith(
|
|
3,
|
|
'Give me the code that inverts binary tree in COBOL',
|
|
);
|
|
|
|
// GPT messages
|
|
expect(mockImportBatchBuilder.addGptMessage).toHaveBeenCalledTimes(3);
|
|
expect(mockImportBatchBuilder.addGptMessage).toHaveBeenNthCalledWith(
|
|
1,
|
|
expect.stringMatching(/^Hello! As an AI developed by OpenAI/),
|
|
'gpt-4-1106-preview',
|
|
);
|
|
expect(mockImportBatchBuilder.addGptMessage).toHaveBeenNthCalledWith(
|
|
3,
|
|
expect.stringContaining('```cobol'),
|
|
'gpt-3.5-turbo',
|
|
);
|
|
|
|
expect(mockImportBatchBuilder.finishConversation).toHaveBeenCalledTimes(2);
|
|
expect(mockImportBatchBuilder.finishConversation).toHaveBeenNthCalledWith(
|
|
1,
|
|
'Hello what are you able to do?',
|
|
expect.any(Date),
|
|
);
|
|
expect(mockImportBatchBuilder.finishConversation).toHaveBeenNthCalledWith(
|
|
2,
|
|
'Give me the code that inverts ...',
|
|
expect.any(Date),
|
|
);
|
|
|
|
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
|
|
});
|
|
});
|
|
|
|
describe('getImporter', () => {
|
|
it('should throw an error if the import type is not supported', () => {
|
|
// Given
|
|
const jsonData = { unsupported: 'data' };
|
|
|
|
// When
|
|
expect(() => getImporter(jsonData)).toThrow('Unsupported import type');
|
|
});
|
|
});
|