From 180d0f18fe9ad78ca671c64e9b2fce97c30062fa Mon Sep 17 00:00:00 2001 From: Jakub Fidler <31575114+RisingOrange@users.noreply.github.com> Date: Tue, 30 Dec 2025 03:31:18 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix:=20ChatGPT=20import=20creati?= =?UTF-8?q?ng=20fragmented=20conversation=20tree=20(#11123)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use ChatGPT timestamps (create_time) for proper message ordering - Fallback to conv.create_time for null timestamps - Adjust timestamps so children are always after parents - Guard against circular references in findThinkingContent - Skip thoughts and reasoning_recap messages (merged into responses) - Add comprehensive timestamp ordering tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.5 --- .../utils/import/importers-timestamp.spec.js | 127 +++++++++ api/server/utils/import/importers.js | 135 ++++++++- api/server/utils/import/importers.spec.js | 256 ++++++++++++++++++ 3 files changed, 505 insertions(+), 13 deletions(-) diff --git a/api/server/utils/import/importers-timestamp.spec.js b/api/server/utils/import/importers-timestamp.spec.js index 2ce00de82b..c7665dfe25 100644 --- a/api/server/utils/import/importers-timestamp.spec.js +++ b/api/server/utils/import/importers-timestamp.spec.js @@ -243,6 +243,133 @@ describe('Import Timestamp Ordering', () => { }); }); + describe('ChatGPT Import - Timestamp Issues', () => { + test('should correct timestamp inversions (child before parent)', async () => { + // Simulate ChatGPT export with timestamp inversion (like tool call results) + const jsonData = [ + { + title: 'Timestamp Inversion Test', + create_time: 1000, + mapping: { + 'root-node': { + id: 'root-node', + message: null, + parent: null, + children: ['parent-msg'], + }, + 'parent-msg': { + id: 'parent-msg', + message: { + id: 'parent-msg', + author: { role: 'user' }, + create_time: 1000.1, // Parent: 1000.1 + content: { content_type: 'text', parts: ['Parent message'] }, + metadata: {}, + }, + parent: 'root-node', + children: ['child-msg'], + }, + 'child-msg': { + id: 'child-msg', + message: { + id: 'child-msg', + author: { role: 'assistant' }, + create_time: 1000.095, // Child: 1000.095 (5ms BEFORE parent) + content: { content_type: 'text', parts: ['Child message'] }, + metadata: {}, + }, + parent: 'parent-msg', + children: [], + }, + }, + }, + ]; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + const importer = getImporter(jsonData); + await importer(jsonData, requestUserId, () => importBatchBuilder); + + const savedMessages = importBatchBuilder.messages; + const parent = savedMessages.find((msg) => msg.text === 'Parent message'); + const child = savedMessages.find((msg) => msg.text === 'Child message'); + + expect(parent).toBeDefined(); + expect(child).toBeDefined(); + + // Child timestamp should be adjusted to be after parent + expect(new Date(child.createdAt).getTime()).toBeGreaterThan( + new Date(parent.createdAt).getTime(), + ); + }); + + test('should use conv.create_time for null message timestamps', async () => { + const convCreateTime = 1500000000; // Conversation create time + const jsonData = [ + { + title: 'Null Timestamp Test', + create_time: convCreateTime, + mapping: { + 'root-node': { + id: 'root-node', + message: null, + parent: null, + children: ['msg-with-null-time'], + }, + 'msg-with-null-time': { + id: 'msg-with-null-time', + message: { + id: 'msg-with-null-time', + author: { role: 'user' }, + create_time: null, // Null timestamp + content: { content_type: 'text', parts: ['Message with null time'] }, + metadata: {}, + }, + parent: 'root-node', + children: ['msg-with-valid-time'], + }, + 'msg-with-valid-time': { + id: 'msg-with-valid-time', + message: { + id: 'msg-with-valid-time', + author: { role: 'assistant' }, + create_time: convCreateTime + 10, // Valid timestamp + content: { content_type: 'text', parts: ['Message with valid time'] }, + metadata: {}, + }, + parent: 'msg-with-null-time', + children: [], + }, + }, + }, + ]; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + const importer = getImporter(jsonData); + await importer(jsonData, requestUserId, () => importBatchBuilder); + + const savedMessages = importBatchBuilder.messages; + const nullTimeMsg = savedMessages.find((msg) => msg.text === 'Message with null time'); + const validTimeMsg = savedMessages.find((msg) => msg.text === 'Message with valid time'); + + expect(nullTimeMsg).toBeDefined(); + expect(validTimeMsg).toBeDefined(); + + // Null timestamp should fall back to conv.create_time + expect(nullTimeMsg.createdAt).toEqual(new Date(convCreateTime * 1000)); + + // Child should still be after parent (timestamp adjustment) + expect(new Date(validTimeMsg.createdAt).getTime()).toBeGreaterThan( + new Date(nullTimeMsg.createdAt).getTime(), + ); + }); + }); + describe('Comparison with Fork Functionality', () => { test('fork functionality correctly handles timestamp issues (for comparison)', async () => { const { cloneMessagesWithTimestamps } = require('./fork'); diff --git a/api/server/utils/import/importers.js b/api/server/utils/import/importers.js index cb68946cc3..dc32b018e7 100644 --- a/api/server/utils/import/importers.js +++ b/api/server/utils/import/importers.js @@ -213,11 +213,11 @@ function processConversation(conv, importBatchBuilder, requestUserId) { } /** - * Helper function to find the nearest non-system parent + * Helper function to find the nearest valid parent (skips system, reasoning_recap, and thoughts messages) * @param {string} parentId - The ID of the parent message. - * @returns {string} The ID of the nearest non-system parent message. + * @returns {string} The ID of the nearest valid parent message. */ - const findNonSystemParent = (parentId) => { + const findValidParent = (parentId) => { if (!parentId || !messageMap.has(parentId)) { return Constants.NO_PARENT; } @@ -227,14 +227,62 @@ function processConversation(conv, importBatchBuilder, requestUserId) { return Constants.NO_PARENT; } - /* If parent is a system message, traverse up to find the nearest non-system parent */ - if (parentMapping.message.author?.role === 'system') { - return findNonSystemParent(parentMapping.parent); + /* If parent is a system message, reasoning_recap, or thoughts, traverse up to find the nearest valid parent */ + const contentType = parentMapping.message.content?.content_type; + const shouldSkip = + parentMapping.message.author?.role === 'system' || + contentType === 'reasoning_recap' || + contentType === 'thoughts'; + + if (shouldSkip) { + return findValidParent(parentMapping.parent); } return messageMap.get(parentId); }; + /** + * Helper function to find thinking content from parent chain (thoughts messages) + * @param {string} parentId - The ID of the parent message. + * @param {Set} visited - Set of already-visited IDs to prevent cycles. + * @returns {Array} The thinking content array (empty if not found). + */ + const findThinkingContent = (parentId, visited = new Set()) => { + // Guard against circular references in malformed imports + if (!parentId || visited.has(parentId)) { + return []; + } + visited.add(parentId); + + const parentMapping = conv.mapping[parentId]; + if (!parentMapping?.message) { + return []; + } + + const contentType = parentMapping.message.content?.content_type; + + // If this is a thoughts message, extract the thinking content + if (contentType === 'thoughts') { + const thoughts = parentMapping.message.content.thoughts || []; + const thinkingText = thoughts + .map((t) => t.content || t.summary || '') + .filter(Boolean) + .join('\n\n'); + + if (thinkingText) { + return [{ type: 'think', think: thinkingText }]; + } + return []; + } + + // If this is reasoning_recap, look at its parent for thoughts + if (contentType === 'reasoning_recap') { + return findThinkingContent(parentMapping.parent, visited); + } + + return []; + }; + // Create and save messages using the mapped IDs const messages = []; for (const [id, mapping] of Object.entries(conv.mapping)) { @@ -247,8 +295,20 @@ function processConversation(conv, importBatchBuilder, requestUserId) { continue; } + const contentType = mapping.message.content?.content_type; + + // Skip thoughts messages - they will be merged into the response message + if (contentType === 'thoughts') { + continue; + } + + // Skip reasoning_recap messages (just summaries like "Thought for 44s") + if (contentType === 'reasoning_recap') { + continue; + } + const newMessageId = messageMap.get(id); - const parentMessageId = findNonSystemParent(mapping.parent); + const parentMessageId = findValidParent(mapping.parent); const messageText = formatMessageText(mapping.message); @@ -266,7 +326,12 @@ function processConversation(conv, importBatchBuilder, requestUserId) { } } - messages.push({ + // Use create_time from ChatGPT export to ensure proper message ordering + // For null timestamps, use the conversation's create_time as fallback, or current time as last resort + const messageTime = mapping.message.create_time || conv.create_time; + const createdAt = messageTime ? new Date(messageTime * 1000) : new Date(); + + const message = { messageId: newMessageId, parentMessageId, text: messageText, @@ -275,9 +340,23 @@ function processConversation(conv, importBatchBuilder, requestUserId) { model, user: requestUserId, endpoint: EModelEndpoint.openAI, - }); + createdAt, + }; + + // For assistant messages, check if there's thinking content in the parent chain + if (!isCreatedByUser) { + const thinkingContent = findThinkingContent(mapping.parent); + if (thinkingContent.length > 0) { + // Combine thinking content with the text response + message.content = [...thinkingContent, { type: 'text', text: messageText }]; + } + } + + messages.push(message); } + adjustTimestampsForOrdering(messages); + for (const message of messages) { importBatchBuilder.saveMessage(message); } @@ -325,17 +404,18 @@ function processAssistantMessage(messageData, messageText) { /** * Formats the text content of a message based on its content type and author role. * @param {ChatGPTMessage} messageData - The message data. - * @returns {string} - The updated message text after processing. + * @returns {string} - The formatted message text. */ function formatMessageText(messageData) { - const isText = messageData.content.content_type === 'text'; + const contentType = messageData.content.content_type; + const isText = contentType === 'text'; let messageText = ''; if (isText && messageData.content.parts) { messageText = messageData.content.parts.join(' '); - } else if (messageData.content.content_type === 'code') { + } else if (contentType === 'code') { messageText = `\`\`\`${messageData.content.language}\n${messageData.content.text}\n\`\`\``; - } else if (messageData.content.content_type === 'execution_output') { + } else if (contentType === 'execution_output') { messageText = `Execution Output:\n> ${messageData.content.text}`; } else if (messageData.content.parts) { for (const part of messageData.content.parts) { @@ -357,4 +437,33 @@ function formatMessageText(messageData) { return messageText; } +/** + * Adjusts message timestamps to ensure children always come after parents. + * Messages are sorted by createdAt and buildTree expects parents to appear before children. + * ChatGPT exports can have slight timestamp inversions (e.g., tool call results + * arriving a few ms before their parent). Uses multiple passes to handle cascading adjustments. + * + * @param {Array} messages - Array of message objects with messageId, parentMessageId, and createdAt. + */ +function adjustTimestampsForOrdering(messages) { + const timestampMap = new Map(); + messages.forEach((msg) => timestampMap.set(msg.messageId, msg.createdAt)); + + let hasChanges = true; + while (hasChanges) { + hasChanges = false; + for (const message of messages) { + if (message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT) { + const parentTimestamp = timestampMap.get(message.parentMessageId); + if (parentTimestamp && message.createdAt <= parentTimestamp) { + // Bump child timestamp to 1ms after parent + message.createdAt = new Date(parentTimestamp.getTime() + 1); + timestampMap.set(message.messageId, message.createdAt); + hasChanges = true; + } + } + } + } +} + module.exports = { getImporter, processAssistantMessage }; diff --git a/api/server/utils/import/importers.spec.js b/api/server/utils/import/importers.spec.js index 54b2dfea73..7c284540e6 100644 --- a/api/server/utils/import/importers.spec.js +++ b/api/server/utils/import/importers.spec.js @@ -497,6 +497,262 @@ describe('importChatGptConvo', () => { expect(userMsg.sender).toBe('user'); expect(userMsg.isCreatedByUser).toBe(true); }); + + it('should merge thinking content into assistant message', async () => { + const testData = [ + { + title: 'Thinking Content Test', + create_time: 1000, + update_time: 2000, + mapping: { + 'root-node': { + id: 'root-node', + message: null, + parent: null, + children: ['user-msg-1'], + }, + 'user-msg-1': { + id: 'user-msg-1', + message: { + id: 'user-msg-1', + author: { role: 'user' }, + create_time: 1, + content: { content_type: 'text', parts: ['What is 2+2?'] }, + metadata: {}, + }, + parent: 'root-node', + children: ['thoughts-msg'], + }, + 'thoughts-msg': { + id: 'thoughts-msg', + message: { + id: 'thoughts-msg', + author: { role: 'assistant' }, + create_time: 2, + content: { + content_type: 'thoughts', + thoughts: [ + { content: 'Let me think about this math problem.' }, + { content: 'Adding 2 and 2 together gives 4.' }, + ], + }, + metadata: {}, + }, + parent: 'user-msg-1', + children: ['reasoning-recap-msg'], + }, + 'reasoning-recap-msg': { + id: 'reasoning-recap-msg', + message: { + id: 'reasoning-recap-msg', + author: { role: 'assistant' }, + create_time: 3, + content: { + content_type: 'reasoning_recap', + recap_text: 'Thought for 2 seconds', + }, + metadata: {}, + }, + parent: 'thoughts-msg', + children: ['assistant-msg-1'], + }, + 'assistant-msg-1': { + id: 'assistant-msg-1', + message: { + id: 'assistant-msg-1', + author: { role: 'assistant' }, + create_time: 4, + content: { content_type: 'text', parts: ['The answer is 4.'] }, + metadata: {}, + }, + parent: 'reasoning-recap-msg', + children: [], + }, + }, + }, + ]; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + const importer = getImporter(testData); + await importer(testData, requestUserId, () => importBatchBuilder); + + const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]); + + // Should only have 2 messages: user message and assistant response + // (thoughts and reasoning_recap should be merged/skipped) + expect(savedMessages).toHaveLength(2); + + const userMsg = savedMessages.find((msg) => msg.text === 'What is 2+2?'); + const assistantMsg = savedMessages.find((msg) => msg.text === 'The answer is 4.'); + + expect(userMsg).toBeDefined(); + expect(assistantMsg).toBeDefined(); + + // Assistant message should have content array with thinking block + expect(assistantMsg.content).toBeDefined(); + expect(assistantMsg.content).toHaveLength(2); + expect(assistantMsg.content[0].type).toBe('think'); + expect(assistantMsg.content[0].think).toContain('Let me think about this math problem.'); + expect(assistantMsg.content[0].think).toContain('Adding 2 and 2 together gives 4.'); + expect(assistantMsg.content[1].type).toBe('text'); + expect(assistantMsg.content[1].text).toBe('The answer is 4.'); + + // Verify parent-child relationship is correct (skips thoughts and reasoning_recap) + expect(assistantMsg.parentMessageId).toBe(userMsg.messageId); + }); + + it('should skip reasoning_recap and thoughts messages as separate entries', async () => { + const testData = [ + { + title: 'Skip Thinking Messages Test', + create_time: 1000, + update_time: 2000, + mapping: { + 'root-node': { + id: 'root-node', + message: null, + parent: null, + children: ['user-msg-1'], + }, + 'user-msg-1': { + id: 'user-msg-1', + message: { + id: 'user-msg-1', + author: { role: 'user' }, + create_time: 1, + content: { content_type: 'text', parts: ['Hello'] }, + metadata: {}, + }, + parent: 'root-node', + children: ['thoughts-msg'], + }, + 'thoughts-msg': { + id: 'thoughts-msg', + message: { + id: 'thoughts-msg', + author: { role: 'assistant' }, + create_time: 2, + content: { + content_type: 'thoughts', + thoughts: [{ content: 'Thinking...' }], + }, + metadata: {}, + }, + parent: 'user-msg-1', + children: ['reasoning-recap-msg'], + }, + 'reasoning-recap-msg': { + id: 'reasoning-recap-msg', + message: { + id: 'reasoning-recap-msg', + author: { role: 'assistant' }, + create_time: 3, + content: { + content_type: 'reasoning_recap', + recap_text: 'Thought for 1 second', + }, + metadata: {}, + }, + parent: 'thoughts-msg', + children: ['assistant-msg-1'], + }, + 'assistant-msg-1': { + id: 'assistant-msg-1', + message: { + id: 'assistant-msg-1', + author: { role: 'assistant' }, + create_time: 4, + content: { content_type: 'text', parts: ['Hi there!'] }, + metadata: {}, + }, + parent: 'reasoning-recap-msg', + children: [], + }, + }, + }, + ]; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + const importer = getImporter(testData); + await importer(testData, requestUserId, () => importBatchBuilder); + + const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]); + + // Verify no messages have thoughts or reasoning_recap content types + const thoughtsMessages = savedMessages.filter( + (msg) => + msg.text === '' || msg.text?.includes('Thinking...') || msg.text?.includes('Thought for'), + ); + expect(thoughtsMessages).toHaveLength(0); + + // Only user and assistant text messages should be saved + expect(savedMessages).toHaveLength(2); + expect(savedMessages.map((m) => m.text).sort()).toEqual(['Hello', 'Hi there!'].sort()); + }); + + it('should set createdAt from ChatGPT create_time', async () => { + const testData = [ + { + title: 'Timestamp Test', + create_time: 1000, + update_time: 2000, + mapping: { + 'root-node': { + id: 'root-node', + message: null, + parent: null, + children: ['user-msg-1'], + }, + 'user-msg-1': { + id: 'user-msg-1', + message: { + id: 'user-msg-1', + author: { role: 'user' }, + create_time: 1000, + content: { content_type: 'text', parts: ['Test message'] }, + metadata: {}, + }, + parent: 'root-node', + children: ['assistant-msg-1'], + }, + 'assistant-msg-1': { + id: 'assistant-msg-1', + message: { + id: 'assistant-msg-1', + author: { role: 'assistant' }, + create_time: 2000, + content: { content_type: 'text', parts: ['Response'] }, + metadata: {}, + }, + parent: 'user-msg-1', + children: [], + }, + }, + }, + ]; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + jest.spyOn(importBatchBuilder, 'saveMessage'); + + const importer = getImporter(testData); + await importer(testData, requestUserId, () => importBatchBuilder); + + const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]); + + const userMsg = savedMessages.find((msg) => msg.text === 'Test message'); + const assistantMsg = savedMessages.find((msg) => msg.text === 'Response'); + + // Verify createdAt is set from create_time (converted from Unix timestamp) + expect(userMsg.createdAt).toEqual(new Date(1000 * 1000)); + expect(assistantMsg.createdAt).toEqual(new Date(2000 * 1000)); + }); }); describe('importLibreChatConvo', () => {