🛡️ fix: Prevent loop in ChatGPT import on Cyclic Parent Graphs (#12313)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run

Cap adjustTimestampsForOrdering to N passes and add cycle detection
to findValidParent, preventing DoS via crafted ChatGPT export files
with cyclic parentMessageId relationships.

Add breakParentCycles to sever cyclic back-edges before saving,
ensuring structurally valid message trees are persisted to the DB.
This commit is contained in:
Danny Avila 2026-03-19 17:15:12 -04:00 committed by GitHub
parent ec0238d7ca
commit f380390408
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 219 additions and 26 deletions

View file

@ -1,3 +1,4 @@
const { logger } = require('@librechat/data-schemas');
const { Constants } = require('librechat-data-provider');
const { ImportBatchBuilder } = require('./importBatchBuilder');
const { getImporter } = require('./importers');
@ -368,6 +369,133 @@ describe('Import Timestamp Ordering', () => {
new Date(nullTimeMsg.createdAt).getTime(),
);
});
test('should terminate on cyclic parent relationships and break cycles before saving', async () => {
const warnSpy = jest.spyOn(logger, 'warn');
const jsonData = [
{
title: 'Cycle Test',
create_time: 1700000000,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['message-a'],
},
'message-a': {
id: 'message-a',
message: {
id: 'message-a',
author: { role: 'user' },
create_time: 1700000000,
content: { content_type: 'text', parts: ['Message A'] },
metadata: {},
},
parent: 'message-b',
children: ['message-b'],
},
'message-b': {
id: 'message-b',
message: {
id: 'message-b',
author: { role: 'assistant' },
create_time: 1700000000,
content: { content_type: 'text', parts: ['Message B'] },
metadata: {},
},
parent: 'message-a',
children: ['message-a'],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
const importer = getImporter(jsonData);
await importer(jsonData, requestUserId, () => importBatchBuilder);
const { messages } = importBatchBuilder;
expect(messages).toHaveLength(2);
const msgA = messages.find((m) => m.text === 'Message A');
const msgB = messages.find((m) => m.text === 'Message B');
expect(msgA).toBeDefined();
expect(msgB).toBeDefined();
const roots = messages.filter((m) => m.parentMessageId === Constants.NO_PARENT);
expect(roots).toHaveLength(1);
const [root] = roots;
const nonRoot = messages.find((m) => m.parentMessageId !== Constants.NO_PARENT);
expect(nonRoot.parentMessageId).toBe(root.messageId);
expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('cyclic parent relationships'));
warnSpy.mockRestore();
});
test('should not hang when findValidParent encounters a skippable-message cycle', async () => {
const jsonData = [
{
title: 'Skippable Cycle Test',
create_time: 1700000000,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['real-msg'],
},
'sys-a': {
id: 'sys-a',
message: {
id: 'sys-a',
author: { role: 'system' },
create_time: 1700000000,
content: { content_type: 'text', parts: ['system a'] },
metadata: {},
},
parent: 'sys-b',
children: ['real-msg'],
},
'sys-b': {
id: 'sys-b',
message: {
id: 'sys-b',
author: { role: 'system' },
create_time: 1700000000,
content: { content_type: 'text', parts: ['system b'] },
metadata: {},
},
parent: 'sys-a',
children: [],
},
'real-msg': {
id: 'real-msg',
message: {
id: 'real-msg',
author: { role: 'user' },
create_time: 1700000001,
content: { content_type: 'text', parts: ['Hello'] },
metadata: {},
},
parent: 'sys-a',
children: [],
},
},
},
];
const importBatchBuilder = new ImportBatchBuilder('user-123');
const importer = getImporter(jsonData);
await importer(jsonData, 'user-123', () => importBatchBuilder);
const realMsg = importBatchBuilder.messages.find((m) => m.text === 'Hello');
expect(realMsg).toBeDefined();
expect(realMsg.parentMessageId).toBe(Constants.NO_PARENT);
});
});
describe('Comparison with Fork Functionality', () => {

View file

@ -324,32 +324,42 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
}
/**
* Helper function to find the nearest valid parent (skips system, reasoning_recap, and thoughts messages)
* @param {string} parentId - The ID of the parent message.
* Finds the nearest valid parent by traversing up through skippable messages
* (system, reasoning_recap, thoughts). Uses iterative traversal to avoid
* stack overflow on deep chains of skippable messages.
*
* @param {string} startId - The ID of the starting parent message.
* @returns {string} The ID of the nearest valid parent message.
*/
const findValidParent = (parentId) => {
if (!parentId || !messageMap.has(parentId)) {
return Constants.NO_PARENT;
const findValidParent = (startId) => {
const visited = new Set();
let parentId = startId;
while (parentId) {
if (!messageMap.has(parentId) || visited.has(parentId)) {
return Constants.NO_PARENT;
}
visited.add(parentId);
const parentMapping = conv.mapping[parentId];
if (!parentMapping?.message) {
return Constants.NO_PARENT;
}
const contentType = parentMapping.message.content?.content_type;
const shouldSkip =
parentMapping.message.author?.role === 'system' ||
contentType === 'reasoning_recap' ||
contentType === 'thoughts';
if (!shouldSkip) {
return messageMap.get(parentId);
}
parentId = parentMapping.parent;
}
const parentMapping = conv.mapping[parentId];
if (!parentMapping?.message) {
return Constants.NO_PARENT;
}
/* If parent is a system message, reasoning_recap, or thoughts, traverse up to find the nearest valid parent */
const contentType = parentMapping.message.content?.content_type;
const shouldSkip =
parentMapping.message.author?.role === 'system' ||
contentType === 'reasoning_recap' ||
contentType === 'thoughts';
if (shouldSkip) {
return findValidParent(parentMapping.parent);
}
return messageMap.get(parentId);
return Constants.NO_PARENT;
};
/**
@ -466,7 +476,10 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
messages.push(message);
}
adjustTimestampsForOrdering(messages);
const cycleDetected = adjustTimestampsForOrdering(messages);
if (cycleDetected) {
breakParentCycles(messages);
}
for (const message of messages) {
importBatchBuilder.saveMessage(message);
@ -553,21 +566,30 @@ function formatMessageText(messageData) {
* Messages are sorted by createdAt and buildTree expects parents to appear before children.
* ChatGPT exports can have slight timestamp inversions (e.g., tool call results
* arriving a few ms before their parent). Uses multiple passes to handle cascading adjustments.
* Capped at N passes (where N = message count) to guarantee termination on cyclic graphs.
*
* @param {Array} messages - Array of message objects with messageId, parentMessageId, and createdAt.
* @returns {boolean} True if cyclic parent relationships were detected.
*/
function adjustTimestampsForOrdering(messages) {
if (messages.length === 0) {
return false;
}
const timestampMap = new Map();
messages.forEach((msg) => timestampMap.set(msg.messageId, msg.createdAt));
for (const msg of messages) {
timestampMap.set(msg.messageId, msg.createdAt);
}
let hasChanges = true;
while (hasChanges) {
let remainingPasses = messages.length;
while (hasChanges && remainingPasses > 0) {
hasChanges = false;
remainingPasses--;
for (const message of messages) {
if (message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT) {
const parentTimestamp = timestampMap.get(message.parentMessageId);
if (parentTimestamp && message.createdAt <= parentTimestamp) {
// Bump child timestamp to 1ms after parent
message.createdAt = new Date(parentTimestamp.getTime() + 1);
timestampMap.set(message.messageId, message.createdAt);
hasChanges = true;
@ -575,6 +597,49 @@ function adjustTimestampsForOrdering(messages) {
}
}
}
const cycleDetected = remainingPasses === 0 && hasChanges;
if (cycleDetected) {
logger.warn(
'[importers] Detected cyclic parent relationships while adjusting import timestamps',
);
}
return cycleDetected;
}
/**
* Severs cyclic parentMessageId back-edges so saved messages form a valid tree.
* Walks each message's parent chain; if a message is visited twice, its parentMessageId
* is set to NO_PARENT to break the cycle.
*
* @param {Array} messages - Array of message objects with messageId and parentMessageId.
*/
function breakParentCycles(messages) {
const parentLookup = new Map();
for (const msg of messages) {
parentLookup.set(msg.messageId, msg);
}
const settled = new Set();
for (const message of messages) {
const chain = new Set();
let current = message;
while (current && !settled.has(current.messageId)) {
if (chain.has(current.messageId)) {
current.parentMessageId = Constants.NO_PARENT;
break;
}
chain.add(current.messageId);
const parentId = current.parentMessageId;
if (!parentId || parentId === Constants.NO_PARENT) {
break;
}
current = parentLookup.get(parentId);
}
for (const id of chain) {
settled.add(id);
}
}
}
module.exports = { getImporter, processAssistantMessage };