🐛 fix: ChatGPT import creating fragmented conversation tree (#11123)

- Use ChatGPT timestamps (create_time) for proper message ordering
- Fallback to conv.create_time for null timestamps
- Adjust timestamps so children are always after parents
- Guard against circular references in findThinkingContent
- Skip thoughts and reasoning_recap messages (merged into responses)
- Add comprehensive timestamp ordering tests

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jakub Fidler 2025-12-30 03:31:18 +01:00 committed by GitHub
parent 512fed56bf
commit 180d0f18fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 505 additions and 13 deletions

View file

@ -243,6 +243,133 @@ describe('Import Timestamp Ordering', () => {
});
});
describe('ChatGPT Import - Timestamp Issues', () => {
test('should correct timestamp inversions (child before parent)', async () => {
// Simulate ChatGPT export with timestamp inversion (like tool call results)
const jsonData = [
{
title: 'Timestamp Inversion Test',
create_time: 1000,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['parent-msg'],
},
'parent-msg': {
id: 'parent-msg',
message: {
id: 'parent-msg',
author: { role: 'user' },
create_time: 1000.1, // Parent: 1000.1
content: { content_type: 'text', parts: ['Parent message'] },
metadata: {},
},
parent: 'root-node',
children: ['child-msg'],
},
'child-msg': {
id: 'child-msg',
message: {
id: 'child-msg',
author: { role: 'assistant' },
create_time: 1000.095, // Child: 1000.095 (5ms BEFORE parent)
content: { content_type: 'text', parts: ['Child message'] },
metadata: {},
},
parent: 'parent-msg',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, () => importBatchBuilder);
const savedMessages = importBatchBuilder.messages;
const parent = savedMessages.find((msg) => msg.text === 'Parent message');
const child = savedMessages.find((msg) => msg.text === 'Child message');
expect(parent).toBeDefined();
expect(child).toBeDefined();
// Child timestamp should be adjusted to be after parent
expect(new Date(child.createdAt).getTime()).toBeGreaterThan(
new Date(parent.createdAt).getTime(),
);
});
test('should use conv.create_time for null message timestamps', async () => {
const convCreateTime = 1500000000; // Conversation create time
const jsonData = [
{
title: 'Null Timestamp Test',
create_time: convCreateTime,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['msg-with-null-time'],
},
'msg-with-null-time': {
id: 'msg-with-null-time',
message: {
id: 'msg-with-null-time',
author: { role: 'user' },
create_time: null, // Null timestamp
content: { content_type: 'text', parts: ['Message with null time'] },
metadata: {},
},
parent: 'root-node',
children: ['msg-with-valid-time'],
},
'msg-with-valid-time': {
id: 'msg-with-valid-time',
message: {
id: 'msg-with-valid-time',
author: { role: 'assistant' },
create_time: convCreateTime + 10, // Valid timestamp
content: { content_type: 'text', parts: ['Message with valid time'] },
metadata: {},
},
parent: 'msg-with-null-time',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, () => importBatchBuilder);
const savedMessages = importBatchBuilder.messages;
const nullTimeMsg = savedMessages.find((msg) => msg.text === 'Message with null time');
const validTimeMsg = savedMessages.find((msg) => msg.text === 'Message with valid time');
expect(nullTimeMsg).toBeDefined();
expect(validTimeMsg).toBeDefined();
// Null timestamp should fall back to conv.create_time
expect(nullTimeMsg.createdAt).toEqual(new Date(convCreateTime * 1000));
// Child should still be after parent (timestamp adjustment)
expect(new Date(validTimeMsg.createdAt).getTime()).toBeGreaterThan(
new Date(nullTimeMsg.createdAt).getTime(),
);
});
});
describe('Comparison with Fork Functionality', () => {
test('fork functionality correctly handles timestamp issues (for comparison)', async () => {
const { cloneMessagesWithTimestamps } = require('./fork');

View file

@ -213,11 +213,11 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
}
/**
* Helper function to find the nearest non-system parent
* Helper function to find the nearest valid parent (skips system, reasoning_recap, and thoughts messages)
* @param {string} parentId - The ID of the parent message.
* @returns {string} The ID of the nearest non-system parent message.
* @returns {string} The ID of the nearest valid parent message.
*/
const findNonSystemParent = (parentId) => {
const findValidParent = (parentId) => {
if (!parentId || !messageMap.has(parentId)) {
return Constants.NO_PARENT;
}
@ -227,14 +227,62 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
return Constants.NO_PARENT;
}
/* If parent is a system message, traverse up to find the nearest non-system parent */
if (parentMapping.message.author?.role === 'system') {
return findNonSystemParent(parentMapping.parent);
/* If parent is a system message, reasoning_recap, or thoughts, traverse up to find the nearest valid parent */
const contentType = parentMapping.message.content?.content_type;
const shouldSkip =
parentMapping.message.author?.role === 'system' ||
contentType === 'reasoning_recap' ||
contentType === 'thoughts';
if (shouldSkip) {
return findValidParent(parentMapping.parent);
}
return messageMap.get(parentId);
};
/**
* Helper function to find thinking content from parent chain (thoughts messages)
* @param {string} parentId - The ID of the parent message.
* @param {Set} visited - Set of already-visited IDs to prevent cycles.
* @returns {Array} The thinking content array (empty if not found).
*/
const findThinkingContent = (parentId, visited = new Set()) => {
// Guard against circular references in malformed imports
if (!parentId || visited.has(parentId)) {
return [];
}
visited.add(parentId);
const parentMapping = conv.mapping[parentId];
if (!parentMapping?.message) {
return [];
}
const contentType = parentMapping.message.content?.content_type;
// If this is a thoughts message, extract the thinking content
if (contentType === 'thoughts') {
const thoughts = parentMapping.message.content.thoughts || [];
const thinkingText = thoughts
.map((t) => t.content || t.summary || '')
.filter(Boolean)
.join('\n\n');
if (thinkingText) {
return [{ type: 'think', think: thinkingText }];
}
return [];
}
// If this is reasoning_recap, look at its parent for thoughts
if (contentType === 'reasoning_recap') {
return findThinkingContent(parentMapping.parent, visited);
}
return [];
};
// Create and save messages using the mapped IDs
const messages = [];
for (const [id, mapping] of Object.entries(conv.mapping)) {
@ -247,8 +295,20 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
continue;
}
const contentType = mapping.message.content?.content_type;
// Skip thoughts messages - they will be merged into the response message
if (contentType === 'thoughts') {
continue;
}
// Skip reasoning_recap messages (just summaries like "Thought for 44s")
if (contentType === 'reasoning_recap') {
continue;
}
const newMessageId = messageMap.get(id);
const parentMessageId = findNonSystemParent(mapping.parent);
const parentMessageId = findValidParent(mapping.parent);
const messageText = formatMessageText(mapping.message);
@ -266,7 +326,12 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
}
}
messages.push({
// Use create_time from ChatGPT export to ensure proper message ordering
// For null timestamps, use the conversation's create_time as fallback, or current time as last resort
const messageTime = mapping.message.create_time || conv.create_time;
const createdAt = messageTime ? new Date(messageTime * 1000) : new Date();
const message = {
messageId: newMessageId,
parentMessageId,
text: messageText,
@ -275,9 +340,23 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
model,
user: requestUserId,
endpoint: EModelEndpoint.openAI,
});
createdAt,
};
// For assistant messages, check if there's thinking content in the parent chain
if (!isCreatedByUser) {
const thinkingContent = findThinkingContent(mapping.parent);
if (thinkingContent.length > 0) {
// Combine thinking content with the text response
message.content = [...thinkingContent, { type: 'text', text: messageText }];
}
}
messages.push(message);
}
adjustTimestampsForOrdering(messages);
for (const message of messages) {
importBatchBuilder.saveMessage(message);
}
@ -325,17 +404,18 @@ function processAssistantMessage(messageData, messageText) {
/**
* Formats the text content of a message based on its content type and author role.
* @param {ChatGPTMessage} messageData - The message data.
* @returns {string} - The updated message text after processing.
* @returns {string} - The formatted message text.
*/
function formatMessageText(messageData) {
const isText = messageData.content.content_type === 'text';
const contentType = messageData.content.content_type;
const isText = contentType === 'text';
let messageText = '';
if (isText && messageData.content.parts) {
messageText = messageData.content.parts.join(' ');
} else if (messageData.content.content_type === 'code') {
} else if (contentType === 'code') {
messageText = `\`\`\`${messageData.content.language}\n${messageData.content.text}\n\`\`\``;
} else if (messageData.content.content_type === 'execution_output') {
} else if (contentType === 'execution_output') {
messageText = `Execution Output:\n> ${messageData.content.text}`;
} else if (messageData.content.parts) {
for (const part of messageData.content.parts) {
@ -357,4 +437,33 @@ function formatMessageText(messageData) {
return messageText;
}
/**
* Adjusts message timestamps to ensure children always come after parents.
* Messages are sorted by createdAt and buildTree expects parents to appear before children.
* ChatGPT exports can have slight timestamp inversions (e.g., tool call results
* arriving a few ms before their parent). Uses multiple passes to handle cascading adjustments.
*
* @param {Array} messages - Array of message objects with messageId, parentMessageId, and createdAt.
*/
function adjustTimestampsForOrdering(messages) {
const timestampMap = new Map();
messages.forEach((msg) => timestampMap.set(msg.messageId, msg.createdAt));
let hasChanges = true;
while (hasChanges) {
hasChanges = false;
for (const message of messages) {
if (message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT) {
const parentTimestamp = timestampMap.get(message.parentMessageId);
if (parentTimestamp && message.createdAt <= parentTimestamp) {
// Bump child timestamp to 1ms after parent
message.createdAt = new Date(parentTimestamp.getTime() + 1);
timestampMap.set(message.messageId, message.createdAt);
hasChanges = true;
}
}
}
}
}
module.exports = { getImporter, processAssistantMessage };

View file

@ -497,6 +497,262 @@ describe('importChatGptConvo', () => {
expect(userMsg.sender).toBe('user');
expect(userMsg.isCreatedByUser).toBe(true);
});
it('should merge thinking content into assistant message', async () => {
const testData = [
{
title: 'Thinking Content Test',
create_time: 1000,
update_time: 2000,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['user-msg-1'],
},
'user-msg-1': {
id: 'user-msg-1',
message: {
id: 'user-msg-1',
author: { role: 'user' },
create_time: 1,
content: { content_type: 'text', parts: ['What is 2+2?'] },
metadata: {},
},
parent: 'root-node',
children: ['thoughts-msg'],
},
'thoughts-msg': {
id: 'thoughts-msg',
message: {
id: 'thoughts-msg',
author: { role: 'assistant' },
create_time: 2,
content: {
content_type: 'thoughts',
thoughts: [
{ content: 'Let me think about this math problem.' },
{ content: 'Adding 2 and 2 together gives 4.' },
],
},
metadata: {},
},
parent: 'user-msg-1',
children: ['reasoning-recap-msg'],
},
'reasoning-recap-msg': {
id: 'reasoning-recap-msg',
message: {
id: 'reasoning-recap-msg',
author: { role: 'assistant' },
create_time: 3,
content: {
content_type: 'reasoning_recap',
recap_text: 'Thought for 2 seconds',
},
metadata: {},
},
parent: 'thoughts-msg',
children: ['assistant-msg-1'],
},
'assistant-msg-1': {
id: 'assistant-msg-1',
message: {
id: 'assistant-msg-1',
author: { role: 'assistant' },
create_time: 4,
content: { content_type: 'text', parts: ['The answer is 4.'] },
metadata: {},
},
parent: 'reasoning-recap-msg',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(testData);
await importer(testData, requestUserId, () => importBatchBuilder);
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
// Should only have 2 messages: user message and assistant response
// (thoughts and reasoning_recap should be merged/skipped)
expect(savedMessages).toHaveLength(2);
const userMsg = savedMessages.find((msg) => msg.text === 'What is 2+2?');
const assistantMsg = savedMessages.find((msg) => msg.text === 'The answer is 4.');
expect(userMsg).toBeDefined();
expect(assistantMsg).toBeDefined();
// Assistant message should have content array with thinking block
expect(assistantMsg.content).toBeDefined();
expect(assistantMsg.content).toHaveLength(2);
expect(assistantMsg.content[0].type).toBe('think');
expect(assistantMsg.content[0].think).toContain('Let me think about this math problem.');
expect(assistantMsg.content[0].think).toContain('Adding 2 and 2 together gives 4.');
expect(assistantMsg.content[1].type).toBe('text');
expect(assistantMsg.content[1].text).toBe('The answer is 4.');
// Verify parent-child relationship is correct (skips thoughts and reasoning_recap)
expect(assistantMsg.parentMessageId).toBe(userMsg.messageId);
});
it('should skip reasoning_recap and thoughts messages as separate entries', async () => {
const testData = [
{
title: 'Skip Thinking Messages Test',
create_time: 1000,
update_time: 2000,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['user-msg-1'],
},
'user-msg-1': {
id: 'user-msg-1',
message: {
id: 'user-msg-1',
author: { role: 'user' },
create_time: 1,
content: { content_type: 'text', parts: ['Hello'] },
metadata: {},
},
parent: 'root-node',
children: ['thoughts-msg'],
},
'thoughts-msg': {
id: 'thoughts-msg',
message: {
id: 'thoughts-msg',
author: { role: 'assistant' },
create_time: 2,
content: {
content_type: 'thoughts',
thoughts: [{ content: 'Thinking...' }],
},
metadata: {},
},
parent: 'user-msg-1',
children: ['reasoning-recap-msg'],
},
'reasoning-recap-msg': {
id: 'reasoning-recap-msg',
message: {
id: 'reasoning-recap-msg',
author: { role: 'assistant' },
create_time: 3,
content: {
content_type: 'reasoning_recap',
recap_text: 'Thought for 1 second',
},
metadata: {},
},
parent: 'thoughts-msg',
children: ['assistant-msg-1'],
},
'assistant-msg-1': {
id: 'assistant-msg-1',
message: {
id: 'assistant-msg-1',
author: { role: 'assistant' },
create_time: 4,
content: { content_type: 'text', parts: ['Hi there!'] },
metadata: {},
},
parent: 'reasoning-recap-msg',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(testData);
await importer(testData, requestUserId, () => importBatchBuilder);
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
// Verify no messages have thoughts or reasoning_recap content types
const thoughtsMessages = savedMessages.filter(
(msg) =>
msg.text === '' || msg.text?.includes('Thinking...') || msg.text?.includes('Thought for'),
);
expect(thoughtsMessages).toHaveLength(0);
// Only user and assistant text messages should be saved
expect(savedMessages).toHaveLength(2);
expect(savedMessages.map((m) => m.text).sort()).toEqual(['Hello', 'Hi there!'].sort());
});
it('should set createdAt from ChatGPT create_time', async () => {
const testData = [
{
title: 'Timestamp Test',
create_time: 1000,
update_time: 2000,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['user-msg-1'],
},
'user-msg-1': {
id: 'user-msg-1',
message: {
id: 'user-msg-1',
author: { role: 'user' },
create_time: 1000,
content: { content_type: 'text', parts: ['Test message'] },
metadata: {},
},
parent: 'root-node',
children: ['assistant-msg-1'],
},
'assistant-msg-1': {
id: 'assistant-msg-1',
message: {
id: 'assistant-msg-1',
author: { role: 'assistant' },
create_time: 2000,
content: { content_type: 'text', parts: ['Response'] },
metadata: {},
},
parent: 'user-msg-1',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(testData);
await importer(testData, requestUserId, () => importBatchBuilder);
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
const userMsg = savedMessages.find((msg) => msg.text === 'Test message');
const assistantMsg = savedMessages.find((msg) => msg.text === 'Response');
// Verify createdAt is set from create_time (converted from Unix timestamp)
expect(userMsg.createdAt).toEqual(new Date(1000 * 1000));
expect(assistantMsg.createdAt).toEqual(new Date(2000 * 1000));
});
});
describe('importLibreChatConvo', () => {