diff --git a/api/server/utils/import/importers-timestamp.spec.js b/api/server/utils/import/importers-timestamp.spec.js new file mode 100644 index 0000000000..2ce00de82b --- /dev/null +++ b/api/server/utils/import/importers-timestamp.spec.js @@ -0,0 +1,280 @@ +const { Constants } = require('librechat-data-provider'); +const { ImportBatchBuilder } = require('./importBatchBuilder'); +const { getImporter } = require('./importers'); + +// Mock the database methods +jest.mock('~/models/Conversation', () => ({ + bulkSaveConvos: jest.fn(), +})); +jest.mock('~/models/Message', () => ({ + bulkSaveMessages: jest.fn(), +})); +jest.mock('~/cache/getLogStores'); +const getLogStores = require('~/cache/getLogStores'); +const mockedCacheGet = jest.fn(); +getLogStores.mockImplementation(() => ({ + get: mockedCacheGet, +})); + +describe('Import Timestamp Ordering', () => { + beforeEach(() => { + jest.clearAllMocks(); + mockedCacheGet.mockResolvedValue(null); + }); + + describe('LibreChat Import - Timestamp Issues', () => { + test('should maintain proper timestamp order between parent and child messages', async () => { + // Create a LibreChat export with out-of-order timestamps + const jsonData = { + conversationId: 'test-convo-123', + title: 'Test Conversation', + messages: [ + { + messageId: 'parent-1', + parentMessageId: Constants.NO_PARENT, + text: 'Parent Message', + sender: 'user', + isCreatedByUser: true, + createdAt: '2023-01-01T00:02:00Z', // Parent created AFTER child + }, + { + messageId: 'child-1', + parentMessageId: 'parent-1', + text: 'Child Message', + sender: 'assistant', + isCreatedByUser: false, + createdAt: '2023-01-01T00:01:00Z', // Child created BEFORE parent + }, + { + messageId: 'grandchild-1', + parentMessageId: 'child-1', + text: 'Grandchild Message', + sender: 'user', + isCreatedByUser: true, + createdAt: '2023-01-01T00:00:30Z', // Even earlier + }, + ], + }; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + const importer = getImporter(jsonData); + await importer(jsonData, requestUserId, () => importBatchBuilder); + + // Check the actual messages stored in the builder + const savedMessages = importBatchBuilder.messages; + + const parent = savedMessages.find((msg) => msg.text === 'Parent Message'); + const child = savedMessages.find((msg) => msg.text === 'Child Message'); + const grandchild = savedMessages.find((msg) => msg.text === 'Grandchild Message'); + + // Verify all messages were found + expect(parent).toBeDefined(); + expect(child).toBeDefined(); + expect(grandchild).toBeDefined(); + + // FIXED behavior: timestamps ARE corrected + expect(new Date(child.createdAt).getTime()).toBeGreaterThan( + new Date(parent.createdAt).getTime(), + ); + expect(new Date(grandchild.createdAt).getTime()).toBeGreaterThan( + new Date(child.createdAt).getTime(), + ); + }); + + test('should handle complex multi-branch scenario with out-of-order timestamps', async () => { + const jsonData = { + conversationId: 'complex-test-123', + title: 'Complex Test', + messages: [ + // Branch 1: Root -> A -> B with reversed timestamps + { + messageId: 'root-1', + parentMessageId: Constants.NO_PARENT, + text: 'Root 1', + sender: 'user', + isCreatedByUser: true, + createdAt: '2023-01-01T00:03:00Z', + }, + { + messageId: 'a-1', + parentMessageId: 'root-1', + text: 'A1', + sender: 'assistant', + isCreatedByUser: false, + createdAt: '2023-01-01T00:02:00Z', // Before parent + }, + { + messageId: 'b-1', + parentMessageId: 'a-1', + text: 'B1', + sender: 'user', + isCreatedByUser: true, + createdAt: '2023-01-01T00:01:00Z', // Before grandparent + }, + // Branch 2: Root -> C -> D with mixed timestamps + { + messageId: 'root-2', + parentMessageId: Constants.NO_PARENT, + text: 'Root 2', + sender: 'user', + isCreatedByUser: true, + createdAt: '2023-01-01T00:00:30Z', // Earlier than branch 1 + }, + { + messageId: 'c-2', + parentMessageId: 'root-2', + text: 'C2', + sender: 'assistant', + isCreatedByUser: false, + createdAt: '2023-01-01T00:04:00Z', // Much later + }, + { + messageId: 'd-2', + parentMessageId: 'c-2', + text: 'D2', + sender: 'user', + isCreatedByUser: true, + createdAt: '2023-01-01T00:02:30Z', // Between root and parent + }, + ], + }; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + const importer = getImporter(jsonData); + await importer(jsonData, requestUserId, () => importBatchBuilder); + + const savedMessages = importBatchBuilder.messages; + + // Verify that timestamps are preserved as-is (not corrected) + const root1 = savedMessages.find((msg) => msg.text === 'Root 1'); + const a1 = savedMessages.find((msg) => msg.text === 'A1'); + const b1 = savedMessages.find((msg) => msg.text === 'B1'); + const root2 = savedMessages.find((msg) => msg.text === 'Root 2'); + const c2 = savedMessages.find((msg) => msg.text === 'C2'); + const d2 = savedMessages.find((msg) => msg.text === 'D2'); + + // Branch 1: timestamps should now be in correct order + expect(new Date(a1.createdAt).getTime()).toBeGreaterThan(new Date(root1.createdAt).getTime()); + expect(new Date(b1.createdAt).getTime()).toBeGreaterThan(new Date(a1.createdAt).getTime()); + + // Branch 2: all timestamps should be properly ordered + expect(new Date(c2.createdAt).getTime()).toBeGreaterThan(new Date(root2.createdAt).getTime()); + expect(new Date(d2.createdAt).getTime()).toBeGreaterThan(new Date(c2.createdAt).getTime()); + }); + + test('recursive format should NOW have timestamp protection', async () => { + // Create a recursive LibreChat export with out-of-order timestamps + const jsonData = { + conversationId: 'recursive-test-123', + title: 'Recursive Test', + recursive: true, + messages: [ + { + messageId: 'parent-1', + parentMessageId: Constants.NO_PARENT, + text: 'Parent Message', + sender: 'User', + isCreatedByUser: true, + createdAt: '2023-01-01T00:02:00Z', // Parent created AFTER child + children: [ + { + messageId: 'child-1', + parentMessageId: 'parent-1', + text: 'Child Message', + sender: 'Assistant', + isCreatedByUser: false, + createdAt: '2023-01-01T00:01:00Z', // Child created BEFORE parent + children: [ + { + messageId: 'grandchild-1', + parentMessageId: 'child-1', + text: 'Grandchild Message', + sender: 'User', + isCreatedByUser: true, + createdAt: '2023-01-01T00:00:30Z', // Even earlier + children: [], + }, + ], + }, + ], + }, + ], + }; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + + const importer = getImporter(jsonData); + await importer(jsonData, requestUserId, () => importBatchBuilder); + + const savedMessages = importBatchBuilder.messages; + + // Messages should be saved + expect(savedMessages).toHaveLength(3); + + // In recursive format, timestamps are NOT included in the saved messages + // The saveMessage method doesn't receive createdAt for recursive imports + const parent = savedMessages.find((msg) => msg.text === 'Parent Message'); + const child = savedMessages.find((msg) => msg.text === 'Child Message'); + const grandchild = savedMessages.find((msg) => msg.text === 'Grandchild Message'); + + expect(parent).toBeDefined(); + expect(child).toBeDefined(); + expect(grandchild).toBeDefined(); + + // Recursive imports NOW preserve and correct timestamps + expect(parent.createdAt).toBeDefined(); + expect(child.createdAt).toBeDefined(); + expect(grandchild.createdAt).toBeDefined(); + + // Timestamps should be corrected to maintain proper order + expect(new Date(child.createdAt).getTime()).toBeGreaterThan( + new Date(parent.createdAt).getTime(), + ); + expect(new Date(grandchild.createdAt).getTime()).toBeGreaterThan( + new Date(child.createdAt).getTime(), + ); + }); + }); + + describe('Comparison with Fork Functionality', () => { + test('fork functionality correctly handles timestamp issues (for comparison)', async () => { + const { cloneMessagesWithTimestamps } = require('./fork'); + + const messagesToClone = [ + { + messageId: 'parent', + parentMessageId: Constants.NO_PARENT, + text: 'Parent Message', + createdAt: '2023-01-01T00:02:00Z', // Parent created AFTER child + }, + { + messageId: 'child', + parentMessageId: 'parent', + text: 'Child Message', + createdAt: '2023-01-01T00:01:00Z', // Child created BEFORE parent + }, + ]; + + const importBatchBuilder = new ImportBatchBuilder('user-123'); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + cloneMessagesWithTimestamps(messagesToClone, importBatchBuilder); + + const savedMessages = importBatchBuilder.messages; + const parent = savedMessages.find((msg) => msg.text === 'Parent Message'); + const child = savedMessages.find((msg) => msg.text === 'Child Message'); + + // Fork functionality DOES correct the timestamps + expect(new Date(child.createdAt).getTime()).toBeGreaterThan( + new Date(parent.createdAt).getTime(), + ); + }); + }); +}); diff --git a/api/server/utils/import/importers.js b/api/server/utils/import/importers.js index b828fed021..ce5ab62454 100644 --- a/api/server/utils/import/importers.js +++ b/api/server/utils/import/importers.js @@ -1,6 +1,7 @@ const { v4: uuidv4 } = require('uuid'); const { EModelEndpoint, Constants, openAISettings, CacheKeys } = require('librechat-data-provider'); const { createImportBatchBuilder } = require('./importBatchBuilder'); +const { cloneMessagesWithTimestamps } = require('./fork'); const getLogStores = require('~/cache/getLogStores'); const logger = require('~/config/winston'); @@ -107,67 +108,47 @@ async function importLibreChatConvo( if (jsonData.recursive) { /** - * Recursively traverse the messages tree and save each message to the database. + * Flatten the recursive message tree into a flat array * @param {TMessage[]} messages * @param {string} parentMessageId + * @param {TMessage[]} flatMessages */ - const traverseMessages = async (messages, parentMessageId = null) => { + const flattenMessages = ( + messages, + parentMessageId = Constants.NO_PARENT, + flatMessages = [], + ) => { for (const message of messages) { if (!message.text && !message.content) { continue; } - let savedMessage; - if (message.sender?.toLowerCase() === 'user' || message.isCreatedByUser) { - savedMessage = await importBatchBuilder.saveMessage({ - text: message.text, - content: message.content, - sender: 'user', - isCreatedByUser: true, - parentMessageId: parentMessageId, - }); - } else { - savedMessage = await importBatchBuilder.saveMessage({ - text: message.text, - content: message.content, - sender: message.sender, - isCreatedByUser: false, - model: options.model, - parentMessageId: parentMessageId, - }); - } + const flatMessage = { + ...message, + parentMessageId: parentMessageId, + children: undefined, // Remove children from flat structure + }; + flatMessages.push(flatMessage); if (!firstMessageDate && message.createdAt) { firstMessageDate = new Date(message.createdAt); } if (message.children && message.children.length > 0) { - await traverseMessages(message.children, savedMessage.messageId); + flattenMessages(message.children, message.messageId, flatMessages); } } + return flatMessages; }; - await traverseMessages(messagesToImport); + const flatMessages = flattenMessages(messagesToImport); + cloneMessagesWithTimestamps(flatMessages, importBatchBuilder); } else if (messagesToImport) { - const idMapping = new Map(); - + cloneMessagesWithTimestamps(messagesToImport, importBatchBuilder); for (const message of messagesToImport) { if (!firstMessageDate && message.createdAt) { firstMessageDate = new Date(message.createdAt); } - const newMessageId = uuidv4(); - idMapping.set(message.messageId, newMessageId); - - const clonedMessage = { - ...message, - messageId: newMessageId, - parentMessageId: - message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT - ? idMapping.get(message.parentMessageId) || Constants.NO_PARENT - : Constants.NO_PARENT, - }; - - importBatchBuilder.saveMessage(clonedMessage); } } else { throw new Error('Invalid LibreChat file format'); diff --git a/api/server/utils/import/importers.spec.js b/api/server/utils/import/importers.spec.js index f08644d5c0..23b7e70901 100644 --- a/api/server/utils/import/importers.spec.js +++ b/api/server/utils/import/importers.spec.js @@ -175,36 +175,60 @@ describe('importLibreChatConvo', () => { jest.spyOn(importBatchBuilder, 'saveMessage'); jest.spyOn(importBatchBuilder, 'saveBatch'); - // When const importer = getImporter(jsonData); await importer(jsonData, requestUserId, () => importBatchBuilder); - // Create a map to track original message IDs to new UUIDs - const idToUUIDMap = new Map(); - importBatchBuilder.saveMessage.mock.calls.forEach((call) => { - const message = call[0]; - idToUUIDMap.set(message.originalMessageId, message.messageId); + // Get the imported messages + const messages = importBatchBuilder.messages; + expect(messages.length).toBeGreaterThan(0); + + // Build maps for verification + const textToMessageMap = new Map(); + const messageIdToMessage = new Map(); + messages.forEach((msg) => { + if (msg.text) { + // For recursive imports, text might be very long, so just use the first 100 chars as key + const textKey = msg.text.substring(0, 100); + textToMessageMap.set(textKey, msg); + } + messageIdToMessage.set(msg.messageId, msg); }); - const checkChildren = (children, parentId) => { - children.forEach((child) => { - const childUUID = idToUUIDMap.get(child.messageId); - const expectedParentId = idToUUIDMap.get(parentId) ?? null; - const messageCall = importBatchBuilder.saveMessage.mock.calls.find( - (call) => call[0].messageId === childUUID, - ); - - const actualParentId = messageCall[0].parentMessageId; - expect(actualParentId).toBe(expectedParentId); - - if (child.children && child.children.length > 0) { - checkChildren(child.children, child.messageId); + // Count expected messages from the tree + const countMessagesInTree = (nodes) => { + let count = 0; + nodes.forEach((node) => { + if (node.text || node.content) { + count++; + } + if (node.children && node.children.length > 0) { + count += countMessagesInTree(node.children); } }); + return count; }; - // Start hierarchy validation from root messages - checkChildren(jsonData.messages, null); + const expectedMessageCount = countMessagesInTree(jsonData.messages); + expect(messages.length).toBe(expectedMessageCount); + + // Verify all messages have valid parent relationships + messages.forEach((msg) => { + if (msg.parentMessageId !== Constants.NO_PARENT) { + const parent = messageIdToMessage.get(msg.parentMessageId); + expect(parent).toBeDefined(); + + // Verify timestamp ordering + if (msg.createdAt && parent.createdAt) { + expect(new Date(msg.createdAt).getTime()).toBeGreaterThanOrEqual( + new Date(parent.createdAt).getTime(), + ); + } + } + }); + + // Verify at least one root message exists + const rootMessages = messages.filter((msg) => msg.parentMessageId === Constants.NO_PARENT); + expect(rootMessages.length).toBeGreaterThan(0); expect(importBatchBuilder.saveBatch).toHaveBeenCalled(); });