LibreChat/api/server/utils/import/importers.spec.js

247 lines
9.2 KiB
JavaScript
Raw Normal View History

📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
const fs = require('fs');
const path = require('path');
const { EModelEndpoint, Constants } = require('librechat-data-provider');
const { ImportBatchBuilder } = require('./importBatchBuilder');
const { getImporter } = require('./importers');
// Mocking the ImportBatchBuilder class and its methods
jest.mock('./importBatchBuilder', () => {
return {
ImportBatchBuilder: jest.fn().mockImplementation(() => {
return {
startConversation: jest.fn().mockResolvedValue(undefined),
addUserMessage: jest.fn().mockResolvedValue(undefined),
addGptMessage: jest.fn().mockResolvedValue(undefined),
saveMessage: jest.fn().mockResolvedValue(undefined),
finishConversation: jest.fn().mockResolvedValue(undefined),
saveBatch: jest.fn().mockResolvedValue(undefined),
};
}),
};
});
describe('importChatGptConvo', () => {
it('should import conversation correctly', async () => {
const expectedNumberOfMessages = 19;
const expectedNumberOfConversations = 2;
// Given
const jsonData = JSON.parse(
fs.readFileSync(path.join(__dirname, '__data__', 'chatgpt-export.json'), 'utf8'),
);
const requestUserId = 'user-123';
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
// When
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, mockedBuilderFactory);
// Then
expect(mockedBuilderFactory).toHaveBeenCalledWith(requestUserId);
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
expect(mockImportBatchBuilder.startConversation).toHaveBeenCalledWith(EModelEndpoint.openAI);
expect(mockImportBatchBuilder.saveMessage).toHaveBeenCalledTimes(expectedNumberOfMessages); // Adjust expected number
expect(mockImportBatchBuilder.finishConversation).toHaveBeenCalledTimes(
expectedNumberOfConversations,
); // Adjust expected number
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
});
it('should maintain correct message hierarchy (tree parent/children relationship)', async () => {
// Prepare test data with known hierarchy
const jsonData = JSON.parse(
fs.readFileSync(path.join(__dirname, '__data__', 'chatgpt-tree.json'), 'utf8'),
);
const requestUserId = 'user-123';
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
// When
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, mockedBuilderFactory);
// Then
expect(mockedBuilderFactory).toHaveBeenCalledWith(requestUserId);
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
const entries = Object.keys(jsonData[0].mapping);
// Filter entries that should be processed (not system and have content)
const messageEntries = entries.filter(
(id) =>
jsonData[0].mapping[id].message &&
jsonData[0].mapping[id].message.author.role !== 'system' &&
jsonData[0].mapping[id].message.content,
);
// Expect the saveMessage to be called for each valid entry
expect(mockImportBatchBuilder.saveMessage).toHaveBeenCalledTimes(messageEntries.length);
const idToUUIDMap = new Map();
// Map original IDs to dynamically generated UUIDs
mockImportBatchBuilder.saveMessage.mock.calls.forEach((call, index) => {
const originalId = messageEntries[index];
idToUUIDMap.set(originalId, call[0].messageId);
});
// Validate the UUID map contains all expected entries
expect(idToUUIDMap.size).toBe(messageEntries.length);
// Validate correct parent-child relationships
messageEntries.forEach((id) => {
const { parent } = jsonData[0].mapping[id];
const expectedParentId = parent
? idToUUIDMap.get(parent) ?? Constants.NO_PARENT
: Constants.NO_PARENT;
const actualParentId = idToUUIDMap.get(id)
? mockImportBatchBuilder.saveMessage.mock.calls.find(
(call) => call[0].messageId === idToUUIDMap.get(id),
)[0].parentMessageId
: Constants.NO_PARENT;
expect(actualParentId).toBe(expectedParentId);
});
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
});
});
describe('importLibreChatConvo', () => {
it('should import conversation correctly', async () => {
const expectedNumberOfMessages = 6;
const expectedNumberOfConversations = 1;
// Given
const jsonData = JSON.parse(
fs.readFileSync(path.join(__dirname, '__data__', 'librechat-export.json'), 'utf8'),
);
const requestUserId = 'user-123';
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
// When
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, mockedBuilderFactory);
// Then
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
expect(mockImportBatchBuilder.startConversation).toHaveBeenCalledWith(EModelEndpoint.openAI);
expect(mockImportBatchBuilder.saveMessage).toHaveBeenCalledTimes(expectedNumberOfMessages); // Adjust expected number
expect(mockImportBatchBuilder.finishConversation).toHaveBeenCalledTimes(
expectedNumberOfConversations,
); // Adjust expected number
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
});
it('should maintain correct message hierarchy (tree parent/children relationship)', async () => {
// Load test data
const jsonData = JSON.parse(
fs.readFileSync(path.join(__dirname, '__data__', 'librechat-tree.json'), 'utf8'),
);
const requestUserId = 'user-123';
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
// When
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, mockedBuilderFactory);
// Then
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
// Create a map to track original message IDs to new UUIDs
const idToUUIDMap = new Map();
mockImportBatchBuilder.saveMessage.mock.calls.forEach((call) => {
const message = call[0];
idToUUIDMap.set(message.originalMessageId, message.messageId);
});
// Function to recursively check children
const checkChildren = (children, parentId) => {
children.forEach((child) => {
const childUUID = idToUUIDMap.get(child.messageId);
const expectedParentId = idToUUIDMap.get(parentId) ?? null;
const messageCall = mockImportBatchBuilder.saveMessage.mock.calls.find(
(call) => call[0].messageId === childUUID,
);
const actualParentId = messageCall[0].parentMessageId;
expect(actualParentId).toBe(expectedParentId);
if (child.children && child.children.length > 0) {
checkChildren(child.children, child.messageId);
}
});
};
// Start hierarchy validation from root messages
checkChildren(jsonData.messagesTree, null); // Assuming root messages have no parent
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
});
});
describe('importChatBotUiConvo', () => {
it('should import custom conversation correctly', async () => {
// Given
const jsonData = JSON.parse(
fs.readFileSync(path.join(__dirname, '__data__', 'chatbotui-export.json'), 'utf8'),
);
const requestUserId = 'custom-user-456';
const mockedBuilderFactory = jest.fn().mockReturnValue(new ImportBatchBuilder(requestUserId));
// When
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, mockedBuilderFactory);
// Then
const mockImportBatchBuilder = mockedBuilderFactory.mock.results[0].value;
expect(mockImportBatchBuilder.startConversation).toHaveBeenCalledWith('openAI');
// User messages
expect(mockImportBatchBuilder.addUserMessage).toHaveBeenCalledTimes(3);
expect(mockImportBatchBuilder.addUserMessage).toHaveBeenNthCalledWith(
1,
'Hello what are you able to do?',
);
expect(mockImportBatchBuilder.addUserMessage).toHaveBeenNthCalledWith(
3,
'Give me the code that inverts binary tree in COBOL',
);
// GPT messages
expect(mockImportBatchBuilder.addGptMessage).toHaveBeenCalledTimes(3);
expect(mockImportBatchBuilder.addGptMessage).toHaveBeenNthCalledWith(
1,
expect.stringMatching(/^Hello! As an AI developed by OpenAI/),
'gpt-4-1106-preview',
);
expect(mockImportBatchBuilder.addGptMessage).toHaveBeenNthCalledWith(
3,
expect.stringContaining('```cobol'),
'gpt-3.5-turbo',
);
expect(mockImportBatchBuilder.finishConversation).toHaveBeenCalledTimes(2);
expect(mockImportBatchBuilder.finishConversation).toHaveBeenNthCalledWith(
1,
'Hello what are you able to do?',
expect.any(Date),
);
expect(mockImportBatchBuilder.finishConversation).toHaveBeenNthCalledWith(
2,
'Give me the code that inverts ...',
expect.any(Date),
);
expect(mockImportBatchBuilder.saveBatch).toHaveBeenCalled();
});
});
describe('getImporter', () => {
it('should throw an error if the import type is not supported', () => {
// Given
const jsonData = { unsupported: 'data' };
// When
expect(() => getImporter(jsonData)).toThrow('Unsupported import type');
});
});