🔻 fix: Role and System Message Handling for ChatGPT Imports (#9524)

* fix: ChatGPT import logic breaks message graph when it encounters a system message

- Implemented `findNonSystemParent` to maintain parent-child relationships by skipping system messages.
- Added a test case to ensure system messages do not disrupt the conversation flow during import.

* fix: ChatGPT import, correct sender for user messages with GPT-4 model

* fix: Enhance model name extraction for assistant messages in import process

- Updated sender assignment logic to dynamically extract model names from model slugs, improving accuracy for various GPT models.
- Added comprehensive tests to validate the extraction and formatting of model names from different model slugs, ensuring robustness in the import functionality.
This commit is contained in:
Danny Avila 2025-09-09 13:51:26 -04:00 committed by GitHub
parent 0d0a318c3c
commit 519645c0b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 434 additions and 9 deletions

View file

@ -1,9 +1,9 @@
const { v4: uuidv4 } = require('uuid');
const { logger } = require('@librechat/data-schemas');
const { EModelEndpoint, Constants, openAISettings, CacheKeys } = require('librechat-data-provider');
const { createImportBatchBuilder } = require('./importBatchBuilder');
const { cloneMessagesWithTimestamps } = require('./fork');
const getLogStores = require('~/cache/getLogStores');
const logger = require('~/config/winston');
/**
* Returns the appropriate importer function based on the provided JSON data.
@ -212,6 +212,29 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
}
}
/**
* Helper function to find the nearest non-system parent
* @param {string} parentId - The ID of the parent message.
* @returns {string} The ID of the nearest non-system parent message.
*/
const findNonSystemParent = (parentId) => {
if (!parentId || !messageMap.has(parentId)) {
return Constants.NO_PARENT;
}
const parentMapping = conv.mapping[parentId];
if (!parentMapping?.message) {
return Constants.NO_PARENT;
}
/* If parent is a system message, traverse up to find the nearest non-system parent */
if (parentMapping.message.author?.role === 'system') {
return findNonSystemParent(parentMapping.parent);
}
return messageMap.get(parentId);
};
// Create and save messages using the mapped IDs
const messages = [];
for (const [id, mapping] of Object.entries(conv.mapping)) {
@ -220,23 +243,27 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
messageMap.delete(id);
continue;
} else if (role === 'system') {
messageMap.delete(id);
// Skip system messages but keep their ID in messageMap for parent references
continue;
}
const newMessageId = messageMap.get(id);
const parentMessageId =
mapping.parent && messageMap.has(mapping.parent)
? messageMap.get(mapping.parent)
: Constants.NO_PARENT;
const parentMessageId = findNonSystemParent(mapping.parent);
const messageText = formatMessageText(mapping.message);
const isCreatedByUser = role === 'user';
let sender = isCreatedByUser ? 'user' : 'GPT-3.5';
let sender = isCreatedByUser ? 'user' : 'assistant';
const model = mapping.message.metadata.model_slug || openAISettings.model.default;
if (model.includes('gpt-4')) {
sender = 'GPT-4';
if (!isCreatedByUser) {
/** Extracted model name from model slug */
const gptMatch = model.match(/gpt-(.+)/i);
if (gptMatch) {
sender = `GPT-${gptMatch[1]}`;
} else {
sender = model || 'assistant';
}
}
messages.push({

View file

@ -99,6 +99,404 @@ describe('importChatGptConvo', () => {
expect(importBatchBuilder.saveBatch).toHaveBeenCalled();
});
it('should handle system messages without breaking parent-child relationships', async () => {
/**
* Test data that reproduces message graph "breaking" when it encounters a system message
*/
const testData = [
{
title: 'System Message Parent Test',
create_time: 1714585031.148505,
update_time: 1714585060.879308,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['user-msg-1'],
},
'user-msg-1': {
id: 'user-msg-1',
message: {
id: 'user-msg-1',
author: { role: 'user' },
create_time: 1714585031.150442,
content: { content_type: 'text', parts: ['First user message'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'root-node',
children: ['assistant-msg-1'],
},
'assistant-msg-1': {
id: 'assistant-msg-1',
message: {
id: 'assistant-msg-1',
author: { role: 'assistant' },
create_time: 1714585032.150442,
content: { content_type: 'text', parts: ['First assistant response'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'user-msg-1',
children: ['system-msg'],
},
'system-msg': {
id: 'system-msg',
message: {
id: 'system-msg',
author: { role: 'system' },
create_time: 1714585033.150442,
content: { content_type: 'text', parts: ['System message in middle'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'assistant-msg-1',
children: ['user-msg-2'],
},
'user-msg-2': {
id: 'user-msg-2',
message: {
id: 'user-msg-2',
author: { role: 'user' },
create_time: 1714585034.150442,
content: { content_type: 'text', parts: ['Second user message'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'system-msg',
children: ['assistant-msg-2'],
},
'assistant-msg-2': {
id: 'assistant-msg-2',
message: {
id: 'assistant-msg-2',
author: { role: 'assistant' },
create_time: 1714585035.150442,
content: { content_type: 'text', parts: ['Second assistant response'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'user-msg-2',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(testData);
await importer(testData, requestUserId, () => importBatchBuilder);
/** 2 user messages + 2 assistant messages (system message should be skipped) */
const expectedMessages = 4;
expect(importBatchBuilder.saveMessage).toHaveBeenCalledTimes(expectedMessages);
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
const messageMap = new Map();
savedMessages.forEach((msg) => {
messageMap.set(msg.text, msg);
});
const firstUser = messageMap.get('First user message');
const firstAssistant = messageMap.get('First assistant response');
const secondUser = messageMap.get('Second user message');
const secondAssistant = messageMap.get('Second assistant response');
expect(firstUser).toBeDefined();
expect(firstAssistant).toBeDefined();
expect(secondUser).toBeDefined();
expect(secondAssistant).toBeDefined();
expect(firstUser.parentMessageId).toBe(Constants.NO_PARENT);
expect(firstAssistant.parentMessageId).toBe(firstUser.messageId);
// This is the key test: second user message should have first assistant as parent
// (not NO_PARENT which would indicate the system message broke the chain)
expect(secondUser.parentMessageId).toBe(firstAssistant.messageId);
expect(secondAssistant.parentMessageId).toBe(secondUser.messageId);
});
it('should maintain correct sender for user messages regardless of GPT-4 model', async () => {
/**
* Test data with GPT-4 model to ensure user messages keep 'user' sender
*/
const testData = [
{
title: 'GPT-4 Sender Test',
create_time: 1714585031.148505,
update_time: 1714585060.879308,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['user-msg-1'],
},
'user-msg-1': {
id: 'user-msg-1',
message: {
id: 'user-msg-1',
author: { role: 'user' },
create_time: 1714585031.150442,
content: { content_type: 'text', parts: ['User message with GPT-4'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'root-node',
children: ['assistant-msg-1'],
},
'assistant-msg-1': {
id: 'assistant-msg-1',
message: {
id: 'assistant-msg-1',
author: { role: 'assistant' },
create_time: 1714585032.150442,
content: { content_type: 'text', parts: ['Assistant response with GPT-4'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'user-msg-1',
children: ['user-msg-2'],
},
'user-msg-2': {
id: 'user-msg-2',
message: {
id: 'user-msg-2',
author: { role: 'user' },
create_time: 1714585033.150442,
content: { content_type: 'text', parts: ['Another user message with GPT-4o-mini'] },
metadata: { model_slug: 'gpt-4o-mini' },
},
parent: 'assistant-msg-1',
children: ['assistant-msg-2'],
},
'assistant-msg-2': {
id: 'assistant-msg-2',
message: {
id: 'assistant-msg-2',
author: { role: 'assistant' },
create_time: 1714585034.150442,
content: { content_type: 'text', parts: ['Assistant response with GPT-3.5'] },
metadata: { model_slug: 'gpt-3.5-turbo' },
},
parent: 'user-msg-2',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(testData);
await importer(testData, requestUserId, () => importBatchBuilder);
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
const userMsg1 = savedMessages.find((msg) => msg.text === 'User message with GPT-4');
const assistantMsg1 = savedMessages.find((msg) => msg.text === 'Assistant response with GPT-4');
const userMsg2 = savedMessages.find(
(msg) => msg.text === 'Another user message with GPT-4o-mini',
);
const assistantMsg2 = savedMessages.find(
(msg) => msg.text === 'Assistant response with GPT-3.5',
);
expect(userMsg1.sender).toBe('user');
expect(userMsg1.isCreatedByUser).toBe(true);
expect(userMsg1.model).toBe('gpt-4');
expect(userMsg2.sender).toBe('user');
expect(userMsg2.isCreatedByUser).toBe(true);
expect(userMsg2.model).toBe('gpt-4o-mini');
expect(assistantMsg1.sender).toBe('GPT-4');
expect(assistantMsg1.isCreatedByUser).toBe(false);
expect(assistantMsg1.model).toBe('gpt-4');
expect(assistantMsg2.sender).toBe('GPT-3.5-turbo');
expect(assistantMsg2.isCreatedByUser).toBe(false);
expect(assistantMsg2.model).toBe('gpt-3.5-turbo');
});
it('should correctly extract and format model names from various model slugs', async () => {
/**
* Test data with various model slugs to test dynamic model identifier extraction
*/
const testData = [
{
title: 'Dynamic Model Identifier Test',
create_time: 1714585031.148505,
update_time: 1714585060.879308,
mapping: {
'root-node': {
id: 'root-node',
message: null,
parent: null,
children: ['msg-1'],
},
'msg-1': {
id: 'msg-1',
message: {
id: 'msg-1',
author: { role: 'user' },
create_time: 1714585031.150442,
content: { content_type: 'text', parts: ['Test message'] },
metadata: {},
},
parent: 'root-node',
children: ['msg-2', 'msg-3', 'msg-4', 'msg-5', 'msg-6', 'msg-7', 'msg-8', 'msg-9'],
},
'msg-2': {
id: 'msg-2',
message: {
id: 'msg-2',
author: { role: 'assistant' },
create_time: 1714585032.150442,
content: { content_type: 'text', parts: ['GPT-4 response'] },
metadata: { model_slug: 'gpt-4' },
},
parent: 'msg-1',
children: [],
},
'msg-3': {
id: 'msg-3',
message: {
id: 'msg-3',
author: { role: 'assistant' },
create_time: 1714585033.150442,
content: { content_type: 'text', parts: ['GPT-4o response'] },
metadata: { model_slug: 'gpt-4o' },
},
parent: 'msg-1',
children: [],
},
'msg-4': {
id: 'msg-4',
message: {
id: 'msg-4',
author: { role: 'assistant' },
create_time: 1714585034.150442,
content: { content_type: 'text', parts: ['GPT-4o-mini response'] },
metadata: { model_slug: 'gpt-4o-mini' },
},
parent: 'msg-1',
children: [],
},
'msg-5': {
id: 'msg-5',
message: {
id: 'msg-5',
author: { role: 'assistant' },
create_time: 1714585035.150442,
content: { content_type: 'text', parts: ['GPT-3.5-turbo response'] },
metadata: { model_slug: 'gpt-3.5-turbo' },
},
parent: 'msg-1',
children: [],
},
'msg-6': {
id: 'msg-6',
message: {
id: 'msg-6',
author: { role: 'assistant' },
create_time: 1714585036.150442,
content: { content_type: 'text', parts: ['GPT-4-turbo response'] },
metadata: { model_slug: 'gpt-4-turbo' },
},
parent: 'msg-1',
children: [],
},
'msg-7': {
id: 'msg-7',
message: {
id: 'msg-7',
author: { role: 'assistant' },
create_time: 1714585037.150442,
content: { content_type: 'text', parts: ['GPT-4-1106-preview response'] },
metadata: { model_slug: 'gpt-4-1106-preview' },
},
parent: 'msg-1',
children: [],
},
'msg-8': {
id: 'msg-8',
message: {
id: 'msg-8',
author: { role: 'assistant' },
create_time: 1714585038.150442,
content: { content_type: 'text', parts: ['Claude response'] },
metadata: { model_slug: 'claude-3-opus' },
},
parent: 'msg-1',
children: [],
},
'msg-9': {
id: 'msg-9',
message: {
id: 'msg-9',
author: { role: 'assistant' },
create_time: 1714585039.150442,
content: { content_type: 'text', parts: ['No model slug response'] },
metadata: {},
},
parent: 'msg-1',
children: [],
},
},
},
];
const requestUserId = 'user-123';
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
jest.spyOn(importBatchBuilder, 'saveMessage');
const importer = getImporter(testData);
await importer(testData, requestUserId, () => importBatchBuilder);
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
// Test various GPT model slug formats
const gpt4 = savedMessages.find((msg) => msg.text === 'GPT-4 response');
expect(gpt4.sender).toBe('GPT-4');
expect(gpt4.model).toBe('gpt-4');
const gpt4o = savedMessages.find((msg) => msg.text === 'GPT-4o response');
expect(gpt4o.sender).toBe('GPT-4o');
expect(gpt4o.model).toBe('gpt-4o');
const gpt4oMini = savedMessages.find((msg) => msg.text === 'GPT-4o-mini response');
expect(gpt4oMini.sender).toBe('GPT-4o-mini');
expect(gpt4oMini.model).toBe('gpt-4o-mini');
const gpt35Turbo = savedMessages.find((msg) => msg.text === 'GPT-3.5-turbo response');
expect(gpt35Turbo.sender).toBe('GPT-3.5-turbo');
expect(gpt35Turbo.model).toBe('gpt-3.5-turbo');
const gpt4Turbo = savedMessages.find((msg) => msg.text === 'GPT-4-turbo response');
expect(gpt4Turbo.sender).toBe('GPT-4-turbo');
expect(gpt4Turbo.model).toBe('gpt-4-turbo');
const gpt4Preview = savedMessages.find((msg) => msg.text === 'GPT-4-1106-preview response');
expect(gpt4Preview.sender).toBe('GPT-4-1106-preview');
expect(gpt4Preview.model).toBe('gpt-4-1106-preview');
// Test non-GPT model (should use the model slug as sender)
const claude = savedMessages.find((msg) => msg.text === 'Claude response');
expect(claude.sender).toBe('claude-3-opus');
expect(claude.model).toBe('claude-3-opus');
// Test missing model slug (should default to openAISettings.model.default)
const noModel = savedMessages.find((msg) => msg.text === 'No model slug response');
// When no model slug is provided, it defaults to gpt-4o-mini which gets formatted to GPT-4o-mini
expect(noModel.sender).toBe('GPT-4o-mini');
expect(noModel.model).toBe(openAISettings.model.default);
// Verify user message is unaffected
const userMsg = savedMessages.find((msg) => msg.text === 'Test message');
expect(userMsg.sender).toBe('user');
expect(userMsg.isCreatedByUser).toBe(true);
});
});
describe('importLibreChatConvo', () => {