mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
⏳ refactor: Add Robust Timestamp handling for Conversation Imports (#8262)
This commit is contained in:
parent
a37bf6719c
commit
3554625a06
3 changed files with 344 additions and 59 deletions
280
api/server/utils/import/importers-timestamp.spec.js
Normal file
280
api/server/utils/import/importers-timestamp.spec.js
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
const { Constants } = require('librechat-data-provider');
|
||||
const { ImportBatchBuilder } = require('./importBatchBuilder');
|
||||
const { getImporter } = require('./importers');
|
||||
|
||||
// Mock the database methods
|
||||
jest.mock('~/models/Conversation', () => ({
|
||||
bulkSaveConvos: jest.fn(),
|
||||
}));
|
||||
jest.mock('~/models/Message', () => ({
|
||||
bulkSaveMessages: jest.fn(),
|
||||
}));
|
||||
jest.mock('~/cache/getLogStores');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const mockedCacheGet = jest.fn();
|
||||
getLogStores.mockImplementation(() => ({
|
||||
get: mockedCacheGet,
|
||||
}));
|
||||
|
||||
describe('Import Timestamp Ordering', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
mockedCacheGet.mockResolvedValue(null);
|
||||
});
|
||||
|
||||
describe('LibreChat Import - Timestamp Issues', () => {
|
||||
test('should maintain proper timestamp order between parent and child messages', async () => {
|
||||
// Create a LibreChat export with out-of-order timestamps
|
||||
const jsonData = {
|
||||
conversationId: 'test-convo-123',
|
||||
title: 'Test Conversation',
|
||||
messages: [
|
||||
{
|
||||
messageId: 'parent-1',
|
||||
parentMessageId: Constants.NO_PARENT,
|
||||
text: 'Parent Message',
|
||||
sender: 'user',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:02:00Z', // Parent created AFTER child
|
||||
},
|
||||
{
|
||||
messageId: 'child-1',
|
||||
parentMessageId: 'parent-1',
|
||||
text: 'Child Message',
|
||||
sender: 'assistant',
|
||||
isCreatedByUser: false,
|
||||
createdAt: '2023-01-01T00:01:00Z', // Child created BEFORE parent
|
||||
},
|
||||
{
|
||||
messageId: 'grandchild-1',
|
||||
parentMessageId: 'child-1',
|
||||
text: 'Grandchild Message',
|
||||
sender: 'user',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:00:30Z', // Even earlier
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const requestUserId = 'user-123';
|
||||
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
|
||||
jest.spyOn(importBatchBuilder, 'saveMessage');
|
||||
|
||||
const importer = getImporter(jsonData);
|
||||
await importer(jsonData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
// Check the actual messages stored in the builder
|
||||
const savedMessages = importBatchBuilder.messages;
|
||||
|
||||
const parent = savedMessages.find((msg) => msg.text === 'Parent Message');
|
||||
const child = savedMessages.find((msg) => msg.text === 'Child Message');
|
||||
const grandchild = savedMessages.find((msg) => msg.text === 'Grandchild Message');
|
||||
|
||||
// Verify all messages were found
|
||||
expect(parent).toBeDefined();
|
||||
expect(child).toBeDefined();
|
||||
expect(grandchild).toBeDefined();
|
||||
|
||||
// FIXED behavior: timestamps ARE corrected
|
||||
expect(new Date(child.createdAt).getTime()).toBeGreaterThan(
|
||||
new Date(parent.createdAt).getTime(),
|
||||
);
|
||||
expect(new Date(grandchild.createdAt).getTime()).toBeGreaterThan(
|
||||
new Date(child.createdAt).getTime(),
|
||||
);
|
||||
});
|
||||
|
||||
test('should handle complex multi-branch scenario with out-of-order timestamps', async () => {
|
||||
const jsonData = {
|
||||
conversationId: 'complex-test-123',
|
||||
title: 'Complex Test',
|
||||
messages: [
|
||||
// Branch 1: Root -> A -> B with reversed timestamps
|
||||
{
|
||||
messageId: 'root-1',
|
||||
parentMessageId: Constants.NO_PARENT,
|
||||
text: 'Root 1',
|
||||
sender: 'user',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:03:00Z',
|
||||
},
|
||||
{
|
||||
messageId: 'a-1',
|
||||
parentMessageId: 'root-1',
|
||||
text: 'A1',
|
||||
sender: 'assistant',
|
||||
isCreatedByUser: false,
|
||||
createdAt: '2023-01-01T00:02:00Z', // Before parent
|
||||
},
|
||||
{
|
||||
messageId: 'b-1',
|
||||
parentMessageId: 'a-1',
|
||||
text: 'B1',
|
||||
sender: 'user',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:01:00Z', // Before grandparent
|
||||
},
|
||||
// Branch 2: Root -> C -> D with mixed timestamps
|
||||
{
|
||||
messageId: 'root-2',
|
||||
parentMessageId: Constants.NO_PARENT,
|
||||
text: 'Root 2',
|
||||
sender: 'user',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:00:30Z', // Earlier than branch 1
|
||||
},
|
||||
{
|
||||
messageId: 'c-2',
|
||||
parentMessageId: 'root-2',
|
||||
text: 'C2',
|
||||
sender: 'assistant',
|
||||
isCreatedByUser: false,
|
||||
createdAt: '2023-01-01T00:04:00Z', // Much later
|
||||
},
|
||||
{
|
||||
messageId: 'd-2',
|
||||
parentMessageId: 'c-2',
|
||||
text: 'D2',
|
||||
sender: 'user',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:02:30Z', // Between root and parent
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const requestUserId = 'user-123';
|
||||
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
|
||||
jest.spyOn(importBatchBuilder, 'saveMessage');
|
||||
|
||||
const importer = getImporter(jsonData);
|
||||
await importer(jsonData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
const savedMessages = importBatchBuilder.messages;
|
||||
|
||||
// Verify that timestamps are preserved as-is (not corrected)
|
||||
const root1 = savedMessages.find((msg) => msg.text === 'Root 1');
|
||||
const a1 = savedMessages.find((msg) => msg.text === 'A1');
|
||||
const b1 = savedMessages.find((msg) => msg.text === 'B1');
|
||||
const root2 = savedMessages.find((msg) => msg.text === 'Root 2');
|
||||
const c2 = savedMessages.find((msg) => msg.text === 'C2');
|
||||
const d2 = savedMessages.find((msg) => msg.text === 'D2');
|
||||
|
||||
// Branch 1: timestamps should now be in correct order
|
||||
expect(new Date(a1.createdAt).getTime()).toBeGreaterThan(new Date(root1.createdAt).getTime());
|
||||
expect(new Date(b1.createdAt).getTime()).toBeGreaterThan(new Date(a1.createdAt).getTime());
|
||||
|
||||
// Branch 2: all timestamps should be properly ordered
|
||||
expect(new Date(c2.createdAt).getTime()).toBeGreaterThan(new Date(root2.createdAt).getTime());
|
||||
expect(new Date(d2.createdAt).getTime()).toBeGreaterThan(new Date(c2.createdAt).getTime());
|
||||
});
|
||||
|
||||
test('recursive format should NOW have timestamp protection', async () => {
|
||||
// Create a recursive LibreChat export with out-of-order timestamps
|
||||
const jsonData = {
|
||||
conversationId: 'recursive-test-123',
|
||||
title: 'Recursive Test',
|
||||
recursive: true,
|
||||
messages: [
|
||||
{
|
||||
messageId: 'parent-1',
|
||||
parentMessageId: Constants.NO_PARENT,
|
||||
text: 'Parent Message',
|
||||
sender: 'User',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:02:00Z', // Parent created AFTER child
|
||||
children: [
|
||||
{
|
||||
messageId: 'child-1',
|
||||
parentMessageId: 'parent-1',
|
||||
text: 'Child Message',
|
||||
sender: 'Assistant',
|
||||
isCreatedByUser: false,
|
||||
createdAt: '2023-01-01T00:01:00Z', // Child created BEFORE parent
|
||||
children: [
|
||||
{
|
||||
messageId: 'grandchild-1',
|
||||
parentMessageId: 'child-1',
|
||||
text: 'Grandchild Message',
|
||||
sender: 'User',
|
||||
isCreatedByUser: true,
|
||||
createdAt: '2023-01-01T00:00:30Z', // Even earlier
|
||||
children: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const requestUserId = 'user-123';
|
||||
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
|
||||
|
||||
const importer = getImporter(jsonData);
|
||||
await importer(jsonData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
const savedMessages = importBatchBuilder.messages;
|
||||
|
||||
// Messages should be saved
|
||||
expect(savedMessages).toHaveLength(3);
|
||||
|
||||
// In recursive format, timestamps are NOT included in the saved messages
|
||||
// The saveMessage method doesn't receive createdAt for recursive imports
|
||||
const parent = savedMessages.find((msg) => msg.text === 'Parent Message');
|
||||
const child = savedMessages.find((msg) => msg.text === 'Child Message');
|
||||
const grandchild = savedMessages.find((msg) => msg.text === 'Grandchild Message');
|
||||
|
||||
expect(parent).toBeDefined();
|
||||
expect(child).toBeDefined();
|
||||
expect(grandchild).toBeDefined();
|
||||
|
||||
// Recursive imports NOW preserve and correct timestamps
|
||||
expect(parent.createdAt).toBeDefined();
|
||||
expect(child.createdAt).toBeDefined();
|
||||
expect(grandchild.createdAt).toBeDefined();
|
||||
|
||||
// Timestamps should be corrected to maintain proper order
|
||||
expect(new Date(child.createdAt).getTime()).toBeGreaterThan(
|
||||
new Date(parent.createdAt).getTime(),
|
||||
);
|
||||
expect(new Date(grandchild.createdAt).getTime()).toBeGreaterThan(
|
||||
new Date(child.createdAt).getTime(),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Comparison with Fork Functionality', () => {
|
||||
test('fork functionality correctly handles timestamp issues (for comparison)', async () => {
|
||||
const { cloneMessagesWithTimestamps } = require('./fork');
|
||||
|
||||
const messagesToClone = [
|
||||
{
|
||||
messageId: 'parent',
|
||||
parentMessageId: Constants.NO_PARENT,
|
||||
text: 'Parent Message',
|
||||
createdAt: '2023-01-01T00:02:00Z', // Parent created AFTER child
|
||||
},
|
||||
{
|
||||
messageId: 'child',
|
||||
parentMessageId: 'parent',
|
||||
text: 'Child Message',
|
||||
createdAt: '2023-01-01T00:01:00Z', // Child created BEFORE parent
|
||||
},
|
||||
];
|
||||
|
||||
const importBatchBuilder = new ImportBatchBuilder('user-123');
|
||||
jest.spyOn(importBatchBuilder, 'saveMessage');
|
||||
|
||||
cloneMessagesWithTimestamps(messagesToClone, importBatchBuilder);
|
||||
|
||||
const savedMessages = importBatchBuilder.messages;
|
||||
const parent = savedMessages.find((msg) => msg.text === 'Parent Message');
|
||||
const child = savedMessages.find((msg) => msg.text === 'Child Message');
|
||||
|
||||
// Fork functionality DOES correct the timestamps
|
||||
expect(new Date(child.createdAt).getTime()).toBeGreaterThan(
|
||||
new Date(parent.createdAt).getTime(),
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
const { v4: uuidv4 } = require('uuid');
|
||||
const { EModelEndpoint, Constants, openAISettings, CacheKeys } = require('librechat-data-provider');
|
||||
const { createImportBatchBuilder } = require('./importBatchBuilder');
|
||||
const { cloneMessagesWithTimestamps } = require('./fork');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const logger = require('~/config/winston');
|
||||
|
||||
|
|
@ -107,67 +108,47 @@ async function importLibreChatConvo(
|
|||
|
||||
if (jsonData.recursive) {
|
||||
/**
|
||||
* Recursively traverse the messages tree and save each message to the database.
|
||||
* Flatten the recursive message tree into a flat array
|
||||
* @param {TMessage[]} messages
|
||||
* @param {string} parentMessageId
|
||||
* @param {TMessage[]} flatMessages
|
||||
*/
|
||||
const traverseMessages = async (messages, parentMessageId = null) => {
|
||||
const flattenMessages = (
|
||||
messages,
|
||||
parentMessageId = Constants.NO_PARENT,
|
||||
flatMessages = [],
|
||||
) => {
|
||||
for (const message of messages) {
|
||||
if (!message.text && !message.content) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let savedMessage;
|
||||
if (message.sender?.toLowerCase() === 'user' || message.isCreatedByUser) {
|
||||
savedMessage = await importBatchBuilder.saveMessage({
|
||||
text: message.text,
|
||||
content: message.content,
|
||||
sender: 'user',
|
||||
isCreatedByUser: true,
|
||||
parentMessageId: parentMessageId,
|
||||
});
|
||||
} else {
|
||||
savedMessage = await importBatchBuilder.saveMessage({
|
||||
text: message.text,
|
||||
content: message.content,
|
||||
sender: message.sender,
|
||||
isCreatedByUser: false,
|
||||
model: options.model,
|
||||
parentMessageId: parentMessageId,
|
||||
});
|
||||
}
|
||||
const flatMessage = {
|
||||
...message,
|
||||
parentMessageId: parentMessageId,
|
||||
children: undefined, // Remove children from flat structure
|
||||
};
|
||||
flatMessages.push(flatMessage);
|
||||
|
||||
if (!firstMessageDate && message.createdAt) {
|
||||
firstMessageDate = new Date(message.createdAt);
|
||||
}
|
||||
|
||||
if (message.children && message.children.length > 0) {
|
||||
await traverseMessages(message.children, savedMessage.messageId);
|
||||
flattenMessages(message.children, message.messageId, flatMessages);
|
||||
}
|
||||
}
|
||||
return flatMessages;
|
||||
};
|
||||
|
||||
await traverseMessages(messagesToImport);
|
||||
const flatMessages = flattenMessages(messagesToImport);
|
||||
cloneMessagesWithTimestamps(flatMessages, importBatchBuilder);
|
||||
} else if (messagesToImport) {
|
||||
const idMapping = new Map();
|
||||
|
||||
cloneMessagesWithTimestamps(messagesToImport, importBatchBuilder);
|
||||
for (const message of messagesToImport) {
|
||||
if (!firstMessageDate && message.createdAt) {
|
||||
firstMessageDate = new Date(message.createdAt);
|
||||
}
|
||||
const newMessageId = uuidv4();
|
||||
idMapping.set(message.messageId, newMessageId);
|
||||
|
||||
const clonedMessage = {
|
||||
...message,
|
||||
messageId: newMessageId,
|
||||
parentMessageId:
|
||||
message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT
|
||||
? idMapping.get(message.parentMessageId) || Constants.NO_PARENT
|
||||
: Constants.NO_PARENT,
|
||||
};
|
||||
|
||||
importBatchBuilder.saveMessage(clonedMessage);
|
||||
}
|
||||
} else {
|
||||
throw new Error('Invalid LibreChat file format');
|
||||
|
|
|
|||
|
|
@ -175,36 +175,60 @@ describe('importLibreChatConvo', () => {
|
|||
jest.spyOn(importBatchBuilder, 'saveMessage');
|
||||
jest.spyOn(importBatchBuilder, 'saveBatch');
|
||||
|
||||
// When
|
||||
const importer = getImporter(jsonData);
|
||||
await importer(jsonData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
// Create a map to track original message IDs to new UUIDs
|
||||
const idToUUIDMap = new Map();
|
||||
importBatchBuilder.saveMessage.mock.calls.forEach((call) => {
|
||||
const message = call[0];
|
||||
idToUUIDMap.set(message.originalMessageId, message.messageId);
|
||||
// Get the imported messages
|
||||
const messages = importBatchBuilder.messages;
|
||||
expect(messages.length).toBeGreaterThan(0);
|
||||
|
||||
// Build maps for verification
|
||||
const textToMessageMap = new Map();
|
||||
const messageIdToMessage = new Map();
|
||||
messages.forEach((msg) => {
|
||||
if (msg.text) {
|
||||
// For recursive imports, text might be very long, so just use the first 100 chars as key
|
||||
const textKey = msg.text.substring(0, 100);
|
||||
textToMessageMap.set(textKey, msg);
|
||||
}
|
||||
messageIdToMessage.set(msg.messageId, msg);
|
||||
});
|
||||
|
||||
const checkChildren = (children, parentId) => {
|
||||
children.forEach((child) => {
|
||||
const childUUID = idToUUIDMap.get(child.messageId);
|
||||
const expectedParentId = idToUUIDMap.get(parentId) ?? null;
|
||||
const messageCall = importBatchBuilder.saveMessage.mock.calls.find(
|
||||
(call) => call[0].messageId === childUUID,
|
||||
);
|
||||
|
||||
const actualParentId = messageCall[0].parentMessageId;
|
||||
expect(actualParentId).toBe(expectedParentId);
|
||||
|
||||
if (child.children && child.children.length > 0) {
|
||||
checkChildren(child.children, child.messageId);
|
||||
// Count expected messages from the tree
|
||||
const countMessagesInTree = (nodes) => {
|
||||
let count = 0;
|
||||
nodes.forEach((node) => {
|
||||
if (node.text || node.content) {
|
||||
count++;
|
||||
}
|
||||
if (node.children && node.children.length > 0) {
|
||||
count += countMessagesInTree(node.children);
|
||||
}
|
||||
});
|
||||
return count;
|
||||
};
|
||||
|
||||
// Start hierarchy validation from root messages
|
||||
checkChildren(jsonData.messages, null);
|
||||
const expectedMessageCount = countMessagesInTree(jsonData.messages);
|
||||
expect(messages.length).toBe(expectedMessageCount);
|
||||
|
||||
// Verify all messages have valid parent relationships
|
||||
messages.forEach((msg) => {
|
||||
if (msg.parentMessageId !== Constants.NO_PARENT) {
|
||||
const parent = messageIdToMessage.get(msg.parentMessageId);
|
||||
expect(parent).toBeDefined();
|
||||
|
||||
// Verify timestamp ordering
|
||||
if (msg.createdAt && parent.createdAt) {
|
||||
expect(new Date(msg.createdAt).getTime()).toBeGreaterThanOrEqual(
|
||||
new Date(parent.createdAt).getTime(),
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Verify at least one root message exists
|
||||
const rootMessages = messages.filter((msg) => msg.parentMessageId === Constants.NO_PARENT);
|
||||
expect(rootMessages.length).toBeGreaterThan(0);
|
||||
|
||||
expect(importBatchBuilder.saveBatch).toHaveBeenCalled();
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue