LibreChat/api/server/utils/import/importers.js
Jakub Fidler 180d0f18fe
🐛 fix: ChatGPT import creating fragmented conversation tree (#11123)
- Use ChatGPT timestamps (create_time) for proper message ordering
- Fallback to conv.create_time for null timestamps
- Adjust timestamps so children are always after parents
- Guard against circular references in findThinkingContent
- Skip thoughts and reasoning_recap messages (merged into responses)
- Add comprehensive timestamp ordering tests

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 21:31:18 -05:00

469 lines
17 KiB
JavaScript

const { v4: uuidv4 } = require('uuid');
const { logger } = require('@librechat/data-schemas');
const { EModelEndpoint, Constants, openAISettings, CacheKeys } = require('librechat-data-provider');
const { createImportBatchBuilder } = require('./importBatchBuilder');
const { cloneMessagesWithTimestamps } = require('./fork');
const getLogStores = require('~/cache/getLogStores');
/**
* Returns the appropriate importer function based on the provided JSON data.
*
* @param {Object} jsonData - The JSON data to import.
* @returns {Function} - The importer function.
* @throws {Error} - If the import type is not supported.
*/
function getImporter(jsonData) {
// For ChatGPT
if (Array.isArray(jsonData)) {
logger.info('Importing ChatGPT conversation');
return importChatGptConvo;
}
// For ChatbotUI
if (jsonData.version && Array.isArray(jsonData.history)) {
logger.info('Importing ChatbotUI conversation');
return importChatBotUiConvo;
}
// For LibreChat
if (jsonData.conversationId && (jsonData.messagesTree || jsonData.messages)) {
logger.info('Importing LibreChat conversation');
return importLibreChatConvo;
}
throw new Error('Unsupported import type');
}
/**
* Imports a chatbot-ui V1 conversation from a JSON file and saves it to the database.
*
* @param {Object} jsonData - The JSON data containing the chatbot conversation.
* @param {string} requestUserId - The ID of the user making the import request.
* @param {Function} [builderFactory=createImportBatchBuilder] - The factory function to create an import batch builder.
* @returns {Promise<void>} - A promise that resolves when the import is complete.
* @throws {Error} - If there is an error creating the conversation from the JSON file.
*/
async function importChatBotUiConvo(
jsonData,
requestUserId,
builderFactory = createImportBatchBuilder,
) {
// this have been tested with chatbot-ui V1 export https://github.com/mckaywrigley/chatbot-ui/tree/b865b0555f53957e96727bc0bbb369c9eaecd83b#legacy-code
try {
/** @type {ImportBatchBuilder} */
const importBatchBuilder = builderFactory(requestUserId);
for (const historyItem of jsonData.history) {
importBatchBuilder.startConversation(EModelEndpoint.openAI);
for (const message of historyItem.messages) {
if (message.role === 'assistant') {
importBatchBuilder.addGptMessage(message.content, historyItem.model.id);
} else if (message.role === 'user') {
importBatchBuilder.addUserMessage(message.content);
}
}
importBatchBuilder.finishConversation(historyItem.name, new Date());
}
await importBatchBuilder.saveBatch();
logger.info(`user: ${requestUserId} | ChatbotUI conversation imported`);
} catch (error) {
logger.error(`user: ${requestUserId} | Error creating conversation from ChatbotUI file`, error);
}
}
/**
* Imports a LibreChat conversation from JSON.
*
* @param {Object} jsonData - The JSON data representing the conversation.
* @param {string} requestUserId - The ID of the user making the import request.
* @param {Function} [builderFactory=createImportBatchBuilder] - The factory function to create an import batch builder.
* @returns {Promise<void>} - A promise that resolves when the import is complete.
*/
async function importLibreChatConvo(
jsonData,
requestUserId,
builderFactory = createImportBatchBuilder,
) {
try {
/** @type {ImportBatchBuilder} */
const importBatchBuilder = builderFactory(requestUserId);
const options = jsonData.options || {};
/* Endpoint configuration */
let endpoint = jsonData.endpoint ?? options.endpoint ?? EModelEndpoint.openAI;
const cache = getLogStores(CacheKeys.CONFIG_STORE);
const endpointsConfig = await cache.get(CacheKeys.ENDPOINT_CONFIG);
const endpointConfig = endpointsConfig?.[endpoint];
if (!endpointConfig && endpointsConfig) {
endpoint = Object.keys(endpointsConfig)[0];
} else if (!endpointConfig) {
endpoint = EModelEndpoint.openAI;
}
importBatchBuilder.startConversation(endpoint);
let firstMessageDate = null;
const messagesToImport = jsonData.messagesTree || jsonData.messages;
if (jsonData.recursive) {
/**
* Flatten the recursive message tree into a flat array
* @param {TMessage[]} messages
* @param {string} parentMessageId
* @param {TMessage[]} flatMessages
*/
const flattenMessages = (
messages,
parentMessageId = Constants.NO_PARENT,
flatMessages = [],
) => {
for (const message of messages) {
if (!message.text && !message.content) {
continue;
}
const flatMessage = {
...message,
parentMessageId: parentMessageId,
children: undefined, // Remove children from flat structure
};
flatMessages.push(flatMessage);
if (!firstMessageDate && message.createdAt) {
firstMessageDate = new Date(message.createdAt);
}
if (message.children && message.children.length > 0) {
flattenMessages(message.children, message.messageId, flatMessages);
}
}
return flatMessages;
};
const flatMessages = flattenMessages(messagesToImport);
cloneMessagesWithTimestamps(flatMessages, importBatchBuilder);
} else if (messagesToImport) {
cloneMessagesWithTimestamps(messagesToImport, importBatchBuilder);
for (const message of messagesToImport) {
if (!firstMessageDate && message.createdAt) {
firstMessageDate = new Date(message.createdAt);
}
}
} else {
throw new Error('Invalid LibreChat file format');
}
if (firstMessageDate === 'Invalid Date') {
firstMessageDate = null;
}
importBatchBuilder.finishConversation(jsonData.title, firstMessageDate ?? new Date(), options);
await importBatchBuilder.saveBatch();
logger.debug(`user: ${requestUserId} | Conversation "${jsonData.title}" imported`);
} catch (error) {
logger.error(`user: ${requestUserId} | Error creating conversation from LibreChat file`, error);
}
}
/**
* Imports ChatGPT conversations from provided JSON data.
* Initializes the import process by creating a batch builder and processing each conversation in the data.
*
* @param {ChatGPTConvo[]} jsonData - Array of conversation objects to be imported.
* @param {string} requestUserId - The ID of the user who initiated the import process.
* @param {Function} builderFactory - Factory function to create a new import batch builder instance, defaults to createImportBatchBuilder.
* @returns {Promise<void>} Promise that resolves when all conversations have been imported.
*/
async function importChatGptConvo(
jsonData,
requestUserId,
builderFactory = createImportBatchBuilder,
) {
try {
const importBatchBuilder = builderFactory(requestUserId);
for (const conv of jsonData) {
processConversation(conv, importBatchBuilder, requestUserId);
}
await importBatchBuilder.saveBatch();
} catch (error) {
logger.error(`user: ${requestUserId} | Error creating conversation from imported file`, error);
}
}
/**
* Processes a single conversation, adding messages to the batch builder based on author roles and handling text content.
* It directly manages the addition of messages for different roles and handles citations for assistant messages.
*
* @param {ChatGPTConvo} conv - A single conversation object that contains multiple messages and other details.
* @param {ImportBatchBuilder} importBatchBuilder - The batch builder instance used to manage and batch conversation data.
* @param {string} requestUserId - The ID of the user who initiated the import process.
* @returns {void}
*/
function processConversation(conv, importBatchBuilder, requestUserId) {
importBatchBuilder.startConversation(EModelEndpoint.openAI);
// Map all message IDs to new UUIDs
const messageMap = new Map();
for (const [id, mapping] of Object.entries(conv.mapping)) {
if (mapping.message && mapping.message.content.content_type) {
const newMessageId = uuidv4();
messageMap.set(id, newMessageId);
}
}
/**
* Helper function to find the nearest valid parent (skips system, reasoning_recap, and thoughts messages)
* @param {string} parentId - The ID of the parent message.
* @returns {string} The ID of the nearest valid parent message.
*/
const findValidParent = (parentId) => {
if (!parentId || !messageMap.has(parentId)) {
return Constants.NO_PARENT;
}
const parentMapping = conv.mapping[parentId];
if (!parentMapping?.message) {
return Constants.NO_PARENT;
}
/* If parent is a system message, reasoning_recap, or thoughts, traverse up to find the nearest valid parent */
const contentType = parentMapping.message.content?.content_type;
const shouldSkip =
parentMapping.message.author?.role === 'system' ||
contentType === 'reasoning_recap' ||
contentType === 'thoughts';
if (shouldSkip) {
return findValidParent(parentMapping.parent);
}
return messageMap.get(parentId);
};
/**
* Helper function to find thinking content from parent chain (thoughts messages)
* @param {string} parentId - The ID of the parent message.
* @param {Set} visited - Set of already-visited IDs to prevent cycles.
* @returns {Array} The thinking content array (empty if not found).
*/
const findThinkingContent = (parentId, visited = new Set()) => {
// Guard against circular references in malformed imports
if (!parentId || visited.has(parentId)) {
return [];
}
visited.add(parentId);
const parentMapping = conv.mapping[parentId];
if (!parentMapping?.message) {
return [];
}
const contentType = parentMapping.message.content?.content_type;
// If this is a thoughts message, extract the thinking content
if (contentType === 'thoughts') {
const thoughts = parentMapping.message.content.thoughts || [];
const thinkingText = thoughts
.map((t) => t.content || t.summary || '')
.filter(Boolean)
.join('\n\n');
if (thinkingText) {
return [{ type: 'think', think: thinkingText }];
}
return [];
}
// If this is reasoning_recap, look at its parent for thoughts
if (contentType === 'reasoning_recap') {
return findThinkingContent(parentMapping.parent, visited);
}
return [];
};
// Create and save messages using the mapped IDs
const messages = [];
for (const [id, mapping] of Object.entries(conv.mapping)) {
const role = mapping.message?.author?.role;
if (!mapping.message) {
messageMap.delete(id);
continue;
} else if (role === 'system') {
// Skip system messages but keep their ID in messageMap for parent references
continue;
}
const contentType = mapping.message.content?.content_type;
// Skip thoughts messages - they will be merged into the response message
if (contentType === 'thoughts') {
continue;
}
// Skip reasoning_recap messages (just summaries like "Thought for 44s")
if (contentType === 'reasoning_recap') {
continue;
}
const newMessageId = messageMap.get(id);
const parentMessageId = findValidParent(mapping.parent);
const messageText = formatMessageText(mapping.message);
const isCreatedByUser = role === 'user';
let sender = isCreatedByUser ? 'user' : 'assistant';
const model = mapping.message.metadata.model_slug || openAISettings.model.default;
if (!isCreatedByUser) {
/** Extracted model name from model slug */
const gptMatch = model.match(/gpt-(.+)/i);
if (gptMatch) {
sender = `GPT-${gptMatch[1]}`;
} else {
sender = model || 'assistant';
}
}
// Use create_time from ChatGPT export to ensure proper message ordering
// For null timestamps, use the conversation's create_time as fallback, or current time as last resort
const messageTime = mapping.message.create_time || conv.create_time;
const createdAt = messageTime ? new Date(messageTime * 1000) : new Date();
const message = {
messageId: newMessageId,
parentMessageId,
text: messageText,
sender,
isCreatedByUser,
model,
user: requestUserId,
endpoint: EModelEndpoint.openAI,
createdAt,
};
// For assistant messages, check if there's thinking content in the parent chain
if (!isCreatedByUser) {
const thinkingContent = findThinkingContent(mapping.parent);
if (thinkingContent.length > 0) {
// Combine thinking content with the text response
message.content = [...thinkingContent, { type: 'text', text: messageText }];
}
}
messages.push(message);
}
adjustTimestampsForOrdering(messages);
for (const message of messages) {
importBatchBuilder.saveMessage(message);
}
importBatchBuilder.finishConversation(conv.title, new Date(conv.create_time * 1000));
}
/**
* Processes text content of messages authored by an assistant, inserting citation links as required.
* Uses citation start and end indices to place links at the correct positions.
*
* @param {ChatGPTMessage} messageData - The message data containing metadata about citations.
* @param {string} messageText - The original text of the message which may be altered by inserting citation links.
* @returns {string} - The updated message text after processing for citations.
*/
function processAssistantMessage(messageData, messageText) {
if (!messageText) {
return messageText;
}
const citations = messageData.metadata?.citations ?? [];
const sortedCitations = [...citations].sort((a, b) => b.start_ix - a.start_ix);
let result = messageText;
for (const citation of sortedCitations) {
if (
!citation.metadata?.type ||
citation.metadata.type !== 'webpage' ||
typeof citation.start_ix !== 'number' ||
typeof citation.end_ix !== 'number' ||
citation.start_ix >= citation.end_ix
) {
continue;
}
const replacement = ` ([${citation.metadata.title}](${citation.metadata.url}))`;
result = result.slice(0, citation.start_ix) + replacement + result.slice(citation.end_ix);
}
return result;
}
/**
* Formats the text content of a message based on its content type and author role.
* @param {ChatGPTMessage} messageData - The message data.
* @returns {string} - The formatted message text.
*/
function formatMessageText(messageData) {
const contentType = messageData.content.content_type;
const isText = contentType === 'text';
let messageText = '';
if (isText && messageData.content.parts) {
messageText = messageData.content.parts.join(' ');
} else if (contentType === 'code') {
messageText = `\`\`\`${messageData.content.language}\n${messageData.content.text}\n\`\`\``;
} else if (contentType === 'execution_output') {
messageText = `Execution Output:\n> ${messageData.content.text}`;
} else if (messageData.content.parts) {
for (const part of messageData.content.parts) {
if (typeof part === 'string') {
messageText += part + ' ';
} else if (typeof part === 'object') {
messageText = `\`\`\`json\n${JSON.stringify(part, null, 2)}\n\`\`\`\n`;
}
}
messageText = messageText.trim();
} else {
messageText = `\`\`\`json\n${JSON.stringify(messageData.content, null, 2)}\n\`\`\``;
}
if (isText && messageData.author.role !== 'user') {
messageText = processAssistantMessage(messageData, messageText);
}
return messageText;
}
/**
* Adjusts message timestamps to ensure children always come after parents.
* Messages are sorted by createdAt and buildTree expects parents to appear before children.
* ChatGPT exports can have slight timestamp inversions (e.g., tool call results
* arriving a few ms before their parent). Uses multiple passes to handle cascading adjustments.
*
* @param {Array} messages - Array of message objects with messageId, parentMessageId, and createdAt.
*/
function adjustTimestampsForOrdering(messages) {
const timestampMap = new Map();
messages.forEach((msg) => timestampMap.set(msg.messageId, msg.createdAt));
let hasChanges = true;
while (hasChanges) {
hasChanges = false;
for (const message of messages) {
if (message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT) {
const parentTimestamp = timestampMap.get(message.parentMessageId);
if (parentTimestamp && message.createdAt <= parentTimestamp) {
// Bump child timestamp to 1ms after parent
message.createdAt = new Date(parentTimestamp.getTime() + 1);
timestampMap.set(message.messageId, message.createdAt);
hasChanges = true;
}
}
}
}
}
module.exports = { getImporter, processAssistantMessage };