mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-03-25 17:16:33 +01:00
🛡️ fix: Prevent loop in ChatGPT import on Cyclic Parent Graphs (#12313)
Cap adjustTimestampsForOrdering to N passes and add cycle detection to findValidParent, preventing DoS via crafted ChatGPT export files with cyclic parentMessageId relationships. Add breakParentCycles to sever cyclic back-edges before saving, ensuring structurally valid message trees are persisted to the DB.
This commit is contained in:
parent
ec0238d7ca
commit
f380390408
2 changed files with 219 additions and 26 deletions
|
|
@ -1,3 +1,4 @@
|
|||
const { logger } = require('@librechat/data-schemas');
|
||||
const { Constants } = require('librechat-data-provider');
|
||||
const { ImportBatchBuilder } = require('./importBatchBuilder');
|
||||
const { getImporter } = require('./importers');
|
||||
|
|
@ -368,6 +369,133 @@ describe('Import Timestamp Ordering', () => {
|
|||
new Date(nullTimeMsg.createdAt).getTime(),
|
||||
);
|
||||
});
|
||||
|
||||
test('should terminate on cyclic parent relationships and break cycles before saving', async () => {
|
||||
const warnSpy = jest.spyOn(logger, 'warn');
|
||||
const jsonData = [
|
||||
{
|
||||
title: 'Cycle Test',
|
||||
create_time: 1700000000,
|
||||
mapping: {
|
||||
'root-node': {
|
||||
id: 'root-node',
|
||||
message: null,
|
||||
parent: null,
|
||||
children: ['message-a'],
|
||||
},
|
||||
'message-a': {
|
||||
id: 'message-a',
|
||||
message: {
|
||||
id: 'message-a',
|
||||
author: { role: 'user' },
|
||||
create_time: 1700000000,
|
||||
content: { content_type: 'text', parts: ['Message A'] },
|
||||
metadata: {},
|
||||
},
|
||||
parent: 'message-b',
|
||||
children: ['message-b'],
|
||||
},
|
||||
'message-b': {
|
||||
id: 'message-b',
|
||||
message: {
|
||||
id: 'message-b',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1700000000,
|
||||
content: { content_type: 'text', parts: ['Message B'] },
|
||||
metadata: {},
|
||||
},
|
||||
parent: 'message-a',
|
||||
children: ['message-a'],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const requestUserId = 'user-123';
|
||||
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
|
||||
|
||||
const importer = getImporter(jsonData);
|
||||
await importer(jsonData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
const { messages } = importBatchBuilder;
|
||||
expect(messages).toHaveLength(2);
|
||||
|
||||
const msgA = messages.find((m) => m.text === 'Message A');
|
||||
const msgB = messages.find((m) => m.text === 'Message B');
|
||||
expect(msgA).toBeDefined();
|
||||
expect(msgB).toBeDefined();
|
||||
|
||||
const roots = messages.filter((m) => m.parentMessageId === Constants.NO_PARENT);
|
||||
expect(roots).toHaveLength(1);
|
||||
|
||||
const [root] = roots;
|
||||
const nonRoot = messages.find((m) => m.parentMessageId !== Constants.NO_PARENT);
|
||||
expect(nonRoot.parentMessageId).toBe(root.messageId);
|
||||
|
||||
expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('cyclic parent relationships'));
|
||||
warnSpy.mockRestore();
|
||||
});
|
||||
|
||||
test('should not hang when findValidParent encounters a skippable-message cycle', async () => {
|
||||
const jsonData = [
|
||||
{
|
||||
title: 'Skippable Cycle Test',
|
||||
create_time: 1700000000,
|
||||
mapping: {
|
||||
'root-node': {
|
||||
id: 'root-node',
|
||||
message: null,
|
||||
parent: null,
|
||||
children: ['real-msg'],
|
||||
},
|
||||
'sys-a': {
|
||||
id: 'sys-a',
|
||||
message: {
|
||||
id: 'sys-a',
|
||||
author: { role: 'system' },
|
||||
create_time: 1700000000,
|
||||
content: { content_type: 'text', parts: ['system a'] },
|
||||
metadata: {},
|
||||
},
|
||||
parent: 'sys-b',
|
||||
children: ['real-msg'],
|
||||
},
|
||||
'sys-b': {
|
||||
id: 'sys-b',
|
||||
message: {
|
||||
id: 'sys-b',
|
||||
author: { role: 'system' },
|
||||
create_time: 1700000000,
|
||||
content: { content_type: 'text', parts: ['system b'] },
|
||||
metadata: {},
|
||||
},
|
||||
parent: 'sys-a',
|
||||
children: [],
|
||||
},
|
||||
'real-msg': {
|
||||
id: 'real-msg',
|
||||
message: {
|
||||
id: 'real-msg',
|
||||
author: { role: 'user' },
|
||||
create_time: 1700000001,
|
||||
content: { content_type: 'text', parts: ['Hello'] },
|
||||
metadata: {},
|
||||
},
|
||||
parent: 'sys-a',
|
||||
children: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const importBatchBuilder = new ImportBatchBuilder('user-123');
|
||||
const importer = getImporter(jsonData);
|
||||
await importer(jsonData, 'user-123', () => importBatchBuilder);
|
||||
|
||||
const realMsg = importBatchBuilder.messages.find((m) => m.text === 'Hello');
|
||||
expect(realMsg).toBeDefined();
|
||||
expect(realMsg.parentMessageId).toBe(Constants.NO_PARENT);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Comparison with Fork Functionality', () => {
|
||||
|
|
|
|||
|
|
@ -324,32 +324,42 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Helper function to find the nearest valid parent (skips system, reasoning_recap, and thoughts messages)
|
||||
* @param {string} parentId - The ID of the parent message.
|
||||
* Finds the nearest valid parent by traversing up through skippable messages
|
||||
* (system, reasoning_recap, thoughts). Uses iterative traversal to avoid
|
||||
* stack overflow on deep chains of skippable messages.
|
||||
*
|
||||
* @param {string} startId - The ID of the starting parent message.
|
||||
* @returns {string} The ID of the nearest valid parent message.
|
||||
*/
|
||||
const findValidParent = (parentId) => {
|
||||
if (!parentId || !messageMap.has(parentId)) {
|
||||
return Constants.NO_PARENT;
|
||||
const findValidParent = (startId) => {
|
||||
const visited = new Set();
|
||||
let parentId = startId;
|
||||
|
||||
while (parentId) {
|
||||
if (!messageMap.has(parentId) || visited.has(parentId)) {
|
||||
return Constants.NO_PARENT;
|
||||
}
|
||||
visited.add(parentId);
|
||||
|
||||
const parentMapping = conv.mapping[parentId];
|
||||
if (!parentMapping?.message) {
|
||||
return Constants.NO_PARENT;
|
||||
}
|
||||
|
||||
const contentType = parentMapping.message.content?.content_type;
|
||||
const shouldSkip =
|
||||
parentMapping.message.author?.role === 'system' ||
|
||||
contentType === 'reasoning_recap' ||
|
||||
contentType === 'thoughts';
|
||||
|
||||
if (!shouldSkip) {
|
||||
return messageMap.get(parentId);
|
||||
}
|
||||
|
||||
parentId = parentMapping.parent;
|
||||
}
|
||||
|
||||
const parentMapping = conv.mapping[parentId];
|
||||
if (!parentMapping?.message) {
|
||||
return Constants.NO_PARENT;
|
||||
}
|
||||
|
||||
/* If parent is a system message, reasoning_recap, or thoughts, traverse up to find the nearest valid parent */
|
||||
const contentType = parentMapping.message.content?.content_type;
|
||||
const shouldSkip =
|
||||
parentMapping.message.author?.role === 'system' ||
|
||||
contentType === 'reasoning_recap' ||
|
||||
contentType === 'thoughts';
|
||||
|
||||
if (shouldSkip) {
|
||||
return findValidParent(parentMapping.parent);
|
||||
}
|
||||
|
||||
return messageMap.get(parentId);
|
||||
return Constants.NO_PARENT;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -466,7 +476,10 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
|
|||
messages.push(message);
|
||||
}
|
||||
|
||||
adjustTimestampsForOrdering(messages);
|
||||
const cycleDetected = adjustTimestampsForOrdering(messages);
|
||||
if (cycleDetected) {
|
||||
breakParentCycles(messages);
|
||||
}
|
||||
|
||||
for (const message of messages) {
|
||||
importBatchBuilder.saveMessage(message);
|
||||
|
|
@ -553,21 +566,30 @@ function formatMessageText(messageData) {
|
|||
* Messages are sorted by createdAt and buildTree expects parents to appear before children.
|
||||
* ChatGPT exports can have slight timestamp inversions (e.g., tool call results
|
||||
* arriving a few ms before their parent). Uses multiple passes to handle cascading adjustments.
|
||||
* Capped at N passes (where N = message count) to guarantee termination on cyclic graphs.
|
||||
*
|
||||
* @param {Array} messages - Array of message objects with messageId, parentMessageId, and createdAt.
|
||||
* @returns {boolean} True if cyclic parent relationships were detected.
|
||||
*/
|
||||
function adjustTimestampsForOrdering(messages) {
|
||||
if (messages.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const timestampMap = new Map();
|
||||
messages.forEach((msg) => timestampMap.set(msg.messageId, msg.createdAt));
|
||||
for (const msg of messages) {
|
||||
timestampMap.set(msg.messageId, msg.createdAt);
|
||||
}
|
||||
|
||||
let hasChanges = true;
|
||||
while (hasChanges) {
|
||||
let remainingPasses = messages.length;
|
||||
while (hasChanges && remainingPasses > 0) {
|
||||
hasChanges = false;
|
||||
remainingPasses--;
|
||||
for (const message of messages) {
|
||||
if (message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT) {
|
||||
const parentTimestamp = timestampMap.get(message.parentMessageId);
|
||||
if (parentTimestamp && message.createdAt <= parentTimestamp) {
|
||||
// Bump child timestamp to 1ms after parent
|
||||
message.createdAt = new Date(parentTimestamp.getTime() + 1);
|
||||
timestampMap.set(message.messageId, message.createdAt);
|
||||
hasChanges = true;
|
||||
|
|
@ -575,6 +597,49 @@ function adjustTimestampsForOrdering(messages) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cycleDetected = remainingPasses === 0 && hasChanges;
|
||||
if (cycleDetected) {
|
||||
logger.warn(
|
||||
'[importers] Detected cyclic parent relationships while adjusting import timestamps',
|
||||
);
|
||||
}
|
||||
return cycleDetected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Severs cyclic parentMessageId back-edges so saved messages form a valid tree.
|
||||
* Walks each message's parent chain; if a message is visited twice, its parentMessageId
|
||||
* is set to NO_PARENT to break the cycle.
|
||||
*
|
||||
* @param {Array} messages - Array of message objects with messageId and parentMessageId.
|
||||
*/
|
||||
function breakParentCycles(messages) {
|
||||
const parentLookup = new Map();
|
||||
for (const msg of messages) {
|
||||
parentLookup.set(msg.messageId, msg);
|
||||
}
|
||||
|
||||
const settled = new Set();
|
||||
for (const message of messages) {
|
||||
const chain = new Set();
|
||||
let current = message;
|
||||
while (current && !settled.has(current.messageId)) {
|
||||
if (chain.has(current.messageId)) {
|
||||
current.parentMessageId = Constants.NO_PARENT;
|
||||
break;
|
||||
}
|
||||
chain.add(current.messageId);
|
||||
const parentId = current.parentMessageId;
|
||||
if (!parentId || parentId === Constants.NO_PARENT) {
|
||||
break;
|
||||
}
|
||||
current = parentLookup.get(parentId);
|
||||
}
|
||||
for (const id of chain) {
|
||||
settled.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { getImporter, processAssistantMessage };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue