From f380390408ffe0ae1470d4c5217224e7de656947 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Thu, 19 Mar 2026 17:15:12 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20fix:=20Prevent=20loop?= =?UTF-8?q?=20in=20ChatGPT=20import=20on=20Cyclic=20Parent=20Graphs=20(#12?= =?UTF-8?q?313)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cap adjustTimestampsForOrdering to N passes and add cycle detection to findValidParent, preventing DoS via crafted ChatGPT export files with cyclic parentMessageId relationships. Add breakParentCycles to sever cyclic back-edges before saving, ensuring structurally valid message trees are persisted to the DB. --- .../utils/import/importers-timestamp.spec.js | 128 ++++++++++++++++++ api/server/utils/import/importers.js | 117 ++++++++++++---- 2 files changed, 219 insertions(+), 26 deletions(-) diff --git a/api/server/utils/import/importers-timestamp.spec.js b/api/server/utils/import/importers-timestamp.spec.js index c7665dfe25..02f24f72ae 100644 --- a/api/server/utils/import/importers-timestamp.spec.js +++ b/api/server/utils/import/importers-timestamp.spec.js @@ -1,3 +1,4 @@ +const { logger } = require('@librechat/data-schemas'); const { Constants } = require('librechat-data-provider'); const { ImportBatchBuilder } = require('./importBatchBuilder'); const { getImporter } = require('./importers'); @@ -368,6 +369,133 @@ describe('Import Timestamp Ordering', () => { new Date(nullTimeMsg.createdAt).getTime(), ); }); + + test('should terminate on cyclic parent relationships and break cycles before saving', async () => { + const warnSpy = jest.spyOn(logger, 'warn'); + const jsonData = [ + { + title: 'Cycle Test', + create_time: 1700000000, + mapping: { + 'root-node': { + id: 'root-node', + message: null, + parent: null, + children: ['message-a'], + }, + 'message-a': { + id: 'message-a', + message: { + id: 'message-a', + author: { role: 'user' }, + create_time: 1700000000, + content: { content_type: 'text', parts: ['Message A'] }, + metadata: {}, + }, + parent: 'message-b', + children: ['message-b'], + }, + 'message-b': { + id: 'message-b', + message: { + id: 'message-b', + author: { role: 'assistant' }, + create_time: 1700000000, + content: { content_type: 'text', parts: ['Message B'] }, + metadata: {}, + }, + parent: 'message-a', + children: ['message-a'], + }, + }, + }, + ]; + + const requestUserId = 'user-123'; + const importBatchBuilder = new ImportBatchBuilder(requestUserId); + + const importer = getImporter(jsonData); + await importer(jsonData, requestUserId, () => importBatchBuilder); + + const { messages } = importBatchBuilder; + expect(messages).toHaveLength(2); + + const msgA = messages.find((m) => m.text === 'Message A'); + const msgB = messages.find((m) => m.text === 'Message B'); + expect(msgA).toBeDefined(); + expect(msgB).toBeDefined(); + + const roots = messages.filter((m) => m.parentMessageId === Constants.NO_PARENT); + expect(roots).toHaveLength(1); + + const [root] = roots; + const nonRoot = messages.find((m) => m.parentMessageId !== Constants.NO_PARENT); + expect(nonRoot.parentMessageId).toBe(root.messageId); + + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('cyclic parent relationships')); + warnSpy.mockRestore(); + }); + + test('should not hang when findValidParent encounters a skippable-message cycle', async () => { + const jsonData = [ + { + title: 'Skippable Cycle Test', + create_time: 1700000000, + mapping: { + 'root-node': { + id: 'root-node', + message: null, + parent: null, + children: ['real-msg'], + }, + 'sys-a': { + id: 'sys-a', + message: { + id: 'sys-a', + author: { role: 'system' }, + create_time: 1700000000, + content: { content_type: 'text', parts: ['system a'] }, + metadata: {}, + }, + parent: 'sys-b', + children: ['real-msg'], + }, + 'sys-b': { + id: 'sys-b', + message: { + id: 'sys-b', + author: { role: 'system' }, + create_time: 1700000000, + content: { content_type: 'text', parts: ['system b'] }, + metadata: {}, + }, + parent: 'sys-a', + children: [], + }, + 'real-msg': { + id: 'real-msg', + message: { + id: 'real-msg', + author: { role: 'user' }, + create_time: 1700000001, + content: { content_type: 'text', parts: ['Hello'] }, + metadata: {}, + }, + parent: 'sys-a', + children: [], + }, + }, + }, + ]; + + const importBatchBuilder = new ImportBatchBuilder('user-123'); + const importer = getImporter(jsonData); + await importer(jsonData, 'user-123', () => importBatchBuilder); + + const realMsg = importBatchBuilder.messages.find((m) => m.text === 'Hello'); + expect(realMsg).toBeDefined(); + expect(realMsg.parentMessageId).toBe(Constants.NO_PARENT); + }); }); describe('Comparison with Fork Functionality', () => { diff --git a/api/server/utils/import/importers.js b/api/server/utils/import/importers.js index 81a0f048df..39734c181c 100644 --- a/api/server/utils/import/importers.js +++ b/api/server/utils/import/importers.js @@ -324,32 +324,42 @@ function processConversation(conv, importBatchBuilder, requestUserId) { } /** - * Helper function to find the nearest valid parent (skips system, reasoning_recap, and thoughts messages) - * @param {string} parentId - The ID of the parent message. + * Finds the nearest valid parent by traversing up through skippable messages + * (system, reasoning_recap, thoughts). Uses iterative traversal to avoid + * stack overflow on deep chains of skippable messages. + * + * @param {string} startId - The ID of the starting parent message. * @returns {string} The ID of the nearest valid parent message. */ - const findValidParent = (parentId) => { - if (!parentId || !messageMap.has(parentId)) { - return Constants.NO_PARENT; + const findValidParent = (startId) => { + const visited = new Set(); + let parentId = startId; + + while (parentId) { + if (!messageMap.has(parentId) || visited.has(parentId)) { + return Constants.NO_PARENT; + } + visited.add(parentId); + + const parentMapping = conv.mapping[parentId]; + if (!parentMapping?.message) { + return Constants.NO_PARENT; + } + + const contentType = parentMapping.message.content?.content_type; + const shouldSkip = + parentMapping.message.author?.role === 'system' || + contentType === 'reasoning_recap' || + contentType === 'thoughts'; + + if (!shouldSkip) { + return messageMap.get(parentId); + } + + parentId = parentMapping.parent; } - const parentMapping = conv.mapping[parentId]; - if (!parentMapping?.message) { - return Constants.NO_PARENT; - } - - /* If parent is a system message, reasoning_recap, or thoughts, traverse up to find the nearest valid parent */ - const contentType = parentMapping.message.content?.content_type; - const shouldSkip = - parentMapping.message.author?.role === 'system' || - contentType === 'reasoning_recap' || - contentType === 'thoughts'; - - if (shouldSkip) { - return findValidParent(parentMapping.parent); - } - - return messageMap.get(parentId); + return Constants.NO_PARENT; }; /** @@ -466,7 +476,10 @@ function processConversation(conv, importBatchBuilder, requestUserId) { messages.push(message); } - adjustTimestampsForOrdering(messages); + const cycleDetected = adjustTimestampsForOrdering(messages); + if (cycleDetected) { + breakParentCycles(messages); + } for (const message of messages) { importBatchBuilder.saveMessage(message); @@ -553,21 +566,30 @@ function formatMessageText(messageData) { * Messages are sorted by createdAt and buildTree expects parents to appear before children. * ChatGPT exports can have slight timestamp inversions (e.g., tool call results * arriving a few ms before their parent). Uses multiple passes to handle cascading adjustments. + * Capped at N passes (where N = message count) to guarantee termination on cyclic graphs. * * @param {Array} messages - Array of message objects with messageId, parentMessageId, and createdAt. + * @returns {boolean} True if cyclic parent relationships were detected. */ function adjustTimestampsForOrdering(messages) { + if (messages.length === 0) { + return false; + } + const timestampMap = new Map(); - messages.forEach((msg) => timestampMap.set(msg.messageId, msg.createdAt)); + for (const msg of messages) { + timestampMap.set(msg.messageId, msg.createdAt); + } let hasChanges = true; - while (hasChanges) { + let remainingPasses = messages.length; + while (hasChanges && remainingPasses > 0) { hasChanges = false; + remainingPasses--; for (const message of messages) { if (message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT) { const parentTimestamp = timestampMap.get(message.parentMessageId); if (parentTimestamp && message.createdAt <= parentTimestamp) { - // Bump child timestamp to 1ms after parent message.createdAt = new Date(parentTimestamp.getTime() + 1); timestampMap.set(message.messageId, message.createdAt); hasChanges = true; @@ -575,6 +597,49 @@ function adjustTimestampsForOrdering(messages) { } } } + + const cycleDetected = remainingPasses === 0 && hasChanges; + if (cycleDetected) { + logger.warn( + '[importers] Detected cyclic parent relationships while adjusting import timestamps', + ); + } + return cycleDetected; +} + +/** + * Severs cyclic parentMessageId back-edges so saved messages form a valid tree. + * Walks each message's parent chain; if a message is visited twice, its parentMessageId + * is set to NO_PARENT to break the cycle. + * + * @param {Array} messages - Array of message objects with messageId and parentMessageId. + */ +function breakParentCycles(messages) { + const parentLookup = new Map(); + for (const msg of messages) { + parentLookup.set(msg.messageId, msg); + } + + const settled = new Set(); + for (const message of messages) { + const chain = new Set(); + let current = message; + while (current && !settled.has(current.messageId)) { + if (chain.has(current.messageId)) { + current.parentMessageId = Constants.NO_PARENT; + break; + } + chain.add(current.messageId); + const parentId = current.parentMessageId; + if (!parentId || parentId === Constants.NO_PARENT) { + break; + } + current = parentLookup.get(parentId); + } + for (const id of chain) { + settled.add(id); + } + } } module.exports = { getImporter, processAssistantMessage };