mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-21 21:50:49 +02:00
🔻 fix: Role and System Message Handling for ChatGPT Imports (#9524)
* fix: ChatGPT import logic breaks message graph when it encounters a system message - Implemented `findNonSystemParent` to maintain parent-child relationships by skipping system messages. - Added a test case to ensure system messages do not disrupt the conversation flow during import. * fix: ChatGPT import, correct sender for user messages with GPT-4 model * fix: Enhance model name extraction for assistant messages in import process - Updated sender assignment logic to dynamically extract model names from model slugs, improving accuracy for various GPT models. - Added comprehensive tests to validate the extraction and formatting of model names from different model slugs, ensuring robustness in the import functionality.
This commit is contained in:
parent
0d0a318c3c
commit
519645c0b0
2 changed files with 434 additions and 9 deletions
|
@ -1,9 +1,9 @@
|
|||
const { v4: uuidv4 } = require('uuid');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { EModelEndpoint, Constants, openAISettings, CacheKeys } = require('librechat-data-provider');
|
||||
const { createImportBatchBuilder } = require('./importBatchBuilder');
|
||||
const { cloneMessagesWithTimestamps } = require('./fork');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const logger = require('~/config/winston');
|
||||
|
||||
/**
|
||||
* Returns the appropriate importer function based on the provided JSON data.
|
||||
|
@ -212,6 +212,29 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to find the nearest non-system parent
|
||||
* @param {string} parentId - The ID of the parent message.
|
||||
* @returns {string} The ID of the nearest non-system parent message.
|
||||
*/
|
||||
const findNonSystemParent = (parentId) => {
|
||||
if (!parentId || !messageMap.has(parentId)) {
|
||||
return Constants.NO_PARENT;
|
||||
}
|
||||
|
||||
const parentMapping = conv.mapping[parentId];
|
||||
if (!parentMapping?.message) {
|
||||
return Constants.NO_PARENT;
|
||||
}
|
||||
|
||||
/* If parent is a system message, traverse up to find the nearest non-system parent */
|
||||
if (parentMapping.message.author?.role === 'system') {
|
||||
return findNonSystemParent(parentMapping.parent);
|
||||
}
|
||||
|
||||
return messageMap.get(parentId);
|
||||
};
|
||||
|
||||
// Create and save messages using the mapped IDs
|
||||
const messages = [];
|
||||
for (const [id, mapping] of Object.entries(conv.mapping)) {
|
||||
|
@ -220,23 +243,27 @@ function processConversation(conv, importBatchBuilder, requestUserId) {
|
|||
messageMap.delete(id);
|
||||
continue;
|
||||
} else if (role === 'system') {
|
||||
messageMap.delete(id);
|
||||
// Skip system messages but keep their ID in messageMap for parent references
|
||||
continue;
|
||||
}
|
||||
|
||||
const newMessageId = messageMap.get(id);
|
||||
const parentMessageId =
|
||||
mapping.parent && messageMap.has(mapping.parent)
|
||||
? messageMap.get(mapping.parent)
|
||||
: Constants.NO_PARENT;
|
||||
const parentMessageId = findNonSystemParent(mapping.parent);
|
||||
|
||||
const messageText = formatMessageText(mapping.message);
|
||||
|
||||
const isCreatedByUser = role === 'user';
|
||||
let sender = isCreatedByUser ? 'user' : 'GPT-3.5';
|
||||
let sender = isCreatedByUser ? 'user' : 'assistant';
|
||||
const model = mapping.message.metadata.model_slug || openAISettings.model.default;
|
||||
if (model.includes('gpt-4')) {
|
||||
sender = 'GPT-4';
|
||||
|
||||
if (!isCreatedByUser) {
|
||||
/** Extracted model name from model slug */
|
||||
const gptMatch = model.match(/gpt-(.+)/i);
|
||||
if (gptMatch) {
|
||||
sender = `GPT-${gptMatch[1]}`;
|
||||
} else {
|
||||
sender = model || 'assistant';
|
||||
}
|
||||
}
|
||||
|
||||
messages.push({
|
||||
|
|
|
@ -99,6 +99,404 @@ describe('importChatGptConvo', () => {
|
|||
|
||||
expect(importBatchBuilder.saveBatch).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle system messages without breaking parent-child relationships', async () => {
|
||||
/**
|
||||
* Test data that reproduces message graph "breaking" when it encounters a system message
|
||||
*/
|
||||
const testData = [
|
||||
{
|
||||
title: 'System Message Parent Test',
|
||||
create_time: 1714585031.148505,
|
||||
update_time: 1714585060.879308,
|
||||
mapping: {
|
||||
'root-node': {
|
||||
id: 'root-node',
|
||||
message: null,
|
||||
parent: null,
|
||||
children: ['user-msg-1'],
|
||||
},
|
||||
'user-msg-1': {
|
||||
id: 'user-msg-1',
|
||||
message: {
|
||||
id: 'user-msg-1',
|
||||
author: { role: 'user' },
|
||||
create_time: 1714585031.150442,
|
||||
content: { content_type: 'text', parts: ['First user message'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'root-node',
|
||||
children: ['assistant-msg-1'],
|
||||
},
|
||||
'assistant-msg-1': {
|
||||
id: 'assistant-msg-1',
|
||||
message: {
|
||||
id: 'assistant-msg-1',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585032.150442,
|
||||
content: { content_type: 'text', parts: ['First assistant response'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'user-msg-1',
|
||||
children: ['system-msg'],
|
||||
},
|
||||
'system-msg': {
|
||||
id: 'system-msg',
|
||||
message: {
|
||||
id: 'system-msg',
|
||||
author: { role: 'system' },
|
||||
create_time: 1714585033.150442,
|
||||
content: { content_type: 'text', parts: ['System message in middle'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'assistant-msg-1',
|
||||
children: ['user-msg-2'],
|
||||
},
|
||||
'user-msg-2': {
|
||||
id: 'user-msg-2',
|
||||
message: {
|
||||
id: 'user-msg-2',
|
||||
author: { role: 'user' },
|
||||
create_time: 1714585034.150442,
|
||||
content: { content_type: 'text', parts: ['Second user message'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'system-msg',
|
||||
children: ['assistant-msg-2'],
|
||||
},
|
||||
'assistant-msg-2': {
|
||||
id: 'assistant-msg-2',
|
||||
message: {
|
||||
id: 'assistant-msg-2',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585035.150442,
|
||||
content: { content_type: 'text', parts: ['Second assistant response'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'user-msg-2',
|
||||
children: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const requestUserId = 'user-123';
|
||||
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
|
||||
jest.spyOn(importBatchBuilder, 'saveMessage');
|
||||
|
||||
const importer = getImporter(testData);
|
||||
await importer(testData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
/** 2 user messages + 2 assistant messages (system message should be skipped) */
|
||||
const expectedMessages = 4;
|
||||
expect(importBatchBuilder.saveMessage).toHaveBeenCalledTimes(expectedMessages);
|
||||
|
||||
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
|
||||
|
||||
const messageMap = new Map();
|
||||
savedMessages.forEach((msg) => {
|
||||
messageMap.set(msg.text, msg);
|
||||
});
|
||||
|
||||
const firstUser = messageMap.get('First user message');
|
||||
const firstAssistant = messageMap.get('First assistant response');
|
||||
const secondUser = messageMap.get('Second user message');
|
||||
const secondAssistant = messageMap.get('Second assistant response');
|
||||
|
||||
expect(firstUser).toBeDefined();
|
||||
expect(firstAssistant).toBeDefined();
|
||||
expect(secondUser).toBeDefined();
|
||||
expect(secondAssistant).toBeDefined();
|
||||
expect(firstUser.parentMessageId).toBe(Constants.NO_PARENT);
|
||||
expect(firstAssistant.parentMessageId).toBe(firstUser.messageId);
|
||||
|
||||
// This is the key test: second user message should have first assistant as parent
|
||||
// (not NO_PARENT which would indicate the system message broke the chain)
|
||||
expect(secondUser.parentMessageId).toBe(firstAssistant.messageId);
|
||||
expect(secondAssistant.parentMessageId).toBe(secondUser.messageId);
|
||||
});
|
||||
|
||||
it('should maintain correct sender for user messages regardless of GPT-4 model', async () => {
|
||||
/**
|
||||
* Test data with GPT-4 model to ensure user messages keep 'user' sender
|
||||
*/
|
||||
const testData = [
|
||||
{
|
||||
title: 'GPT-4 Sender Test',
|
||||
create_time: 1714585031.148505,
|
||||
update_time: 1714585060.879308,
|
||||
mapping: {
|
||||
'root-node': {
|
||||
id: 'root-node',
|
||||
message: null,
|
||||
parent: null,
|
||||
children: ['user-msg-1'],
|
||||
},
|
||||
'user-msg-1': {
|
||||
id: 'user-msg-1',
|
||||
message: {
|
||||
id: 'user-msg-1',
|
||||
author: { role: 'user' },
|
||||
create_time: 1714585031.150442,
|
||||
content: { content_type: 'text', parts: ['User message with GPT-4'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'root-node',
|
||||
children: ['assistant-msg-1'],
|
||||
},
|
||||
'assistant-msg-1': {
|
||||
id: 'assistant-msg-1',
|
||||
message: {
|
||||
id: 'assistant-msg-1',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585032.150442,
|
||||
content: { content_type: 'text', parts: ['Assistant response with GPT-4'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'user-msg-1',
|
||||
children: ['user-msg-2'],
|
||||
},
|
||||
'user-msg-2': {
|
||||
id: 'user-msg-2',
|
||||
message: {
|
||||
id: 'user-msg-2',
|
||||
author: { role: 'user' },
|
||||
create_time: 1714585033.150442,
|
||||
content: { content_type: 'text', parts: ['Another user message with GPT-4o-mini'] },
|
||||
metadata: { model_slug: 'gpt-4o-mini' },
|
||||
},
|
||||
parent: 'assistant-msg-1',
|
||||
children: ['assistant-msg-2'],
|
||||
},
|
||||
'assistant-msg-2': {
|
||||
id: 'assistant-msg-2',
|
||||
message: {
|
||||
id: 'assistant-msg-2',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585034.150442,
|
||||
content: { content_type: 'text', parts: ['Assistant response with GPT-3.5'] },
|
||||
metadata: { model_slug: 'gpt-3.5-turbo' },
|
||||
},
|
||||
parent: 'user-msg-2',
|
||||
children: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const requestUserId = 'user-123';
|
||||
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
|
||||
jest.spyOn(importBatchBuilder, 'saveMessage');
|
||||
|
||||
const importer = getImporter(testData);
|
||||
await importer(testData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
|
||||
|
||||
const userMsg1 = savedMessages.find((msg) => msg.text === 'User message with GPT-4');
|
||||
const assistantMsg1 = savedMessages.find((msg) => msg.text === 'Assistant response with GPT-4');
|
||||
const userMsg2 = savedMessages.find(
|
||||
(msg) => msg.text === 'Another user message with GPT-4o-mini',
|
||||
);
|
||||
const assistantMsg2 = savedMessages.find(
|
||||
(msg) => msg.text === 'Assistant response with GPT-3.5',
|
||||
);
|
||||
|
||||
expect(userMsg1.sender).toBe('user');
|
||||
expect(userMsg1.isCreatedByUser).toBe(true);
|
||||
expect(userMsg1.model).toBe('gpt-4');
|
||||
|
||||
expect(userMsg2.sender).toBe('user');
|
||||
expect(userMsg2.isCreatedByUser).toBe(true);
|
||||
expect(userMsg2.model).toBe('gpt-4o-mini');
|
||||
|
||||
expect(assistantMsg1.sender).toBe('GPT-4');
|
||||
expect(assistantMsg1.isCreatedByUser).toBe(false);
|
||||
expect(assistantMsg1.model).toBe('gpt-4');
|
||||
|
||||
expect(assistantMsg2.sender).toBe('GPT-3.5-turbo');
|
||||
expect(assistantMsg2.isCreatedByUser).toBe(false);
|
||||
expect(assistantMsg2.model).toBe('gpt-3.5-turbo');
|
||||
});
|
||||
|
||||
it('should correctly extract and format model names from various model slugs', async () => {
|
||||
/**
|
||||
* Test data with various model slugs to test dynamic model identifier extraction
|
||||
*/
|
||||
const testData = [
|
||||
{
|
||||
title: 'Dynamic Model Identifier Test',
|
||||
create_time: 1714585031.148505,
|
||||
update_time: 1714585060.879308,
|
||||
mapping: {
|
||||
'root-node': {
|
||||
id: 'root-node',
|
||||
message: null,
|
||||
parent: null,
|
||||
children: ['msg-1'],
|
||||
},
|
||||
'msg-1': {
|
||||
id: 'msg-1',
|
||||
message: {
|
||||
id: 'msg-1',
|
||||
author: { role: 'user' },
|
||||
create_time: 1714585031.150442,
|
||||
content: { content_type: 'text', parts: ['Test message'] },
|
||||
metadata: {},
|
||||
},
|
||||
parent: 'root-node',
|
||||
children: ['msg-2', 'msg-3', 'msg-4', 'msg-5', 'msg-6', 'msg-7', 'msg-8', 'msg-9'],
|
||||
},
|
||||
'msg-2': {
|
||||
id: 'msg-2',
|
||||
message: {
|
||||
id: 'msg-2',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585032.150442,
|
||||
content: { content_type: 'text', parts: ['GPT-4 response'] },
|
||||
metadata: { model_slug: 'gpt-4' },
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
'msg-3': {
|
||||
id: 'msg-3',
|
||||
message: {
|
||||
id: 'msg-3',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585033.150442,
|
||||
content: { content_type: 'text', parts: ['GPT-4o response'] },
|
||||
metadata: { model_slug: 'gpt-4o' },
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
'msg-4': {
|
||||
id: 'msg-4',
|
||||
message: {
|
||||
id: 'msg-4',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585034.150442,
|
||||
content: { content_type: 'text', parts: ['GPT-4o-mini response'] },
|
||||
metadata: { model_slug: 'gpt-4o-mini' },
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
'msg-5': {
|
||||
id: 'msg-5',
|
||||
message: {
|
||||
id: 'msg-5',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585035.150442,
|
||||
content: { content_type: 'text', parts: ['GPT-3.5-turbo response'] },
|
||||
metadata: { model_slug: 'gpt-3.5-turbo' },
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
'msg-6': {
|
||||
id: 'msg-6',
|
||||
message: {
|
||||
id: 'msg-6',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585036.150442,
|
||||
content: { content_type: 'text', parts: ['GPT-4-turbo response'] },
|
||||
metadata: { model_slug: 'gpt-4-turbo' },
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
'msg-7': {
|
||||
id: 'msg-7',
|
||||
message: {
|
||||
id: 'msg-7',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585037.150442,
|
||||
content: { content_type: 'text', parts: ['GPT-4-1106-preview response'] },
|
||||
metadata: { model_slug: 'gpt-4-1106-preview' },
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
'msg-8': {
|
||||
id: 'msg-8',
|
||||
message: {
|
||||
id: 'msg-8',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585038.150442,
|
||||
content: { content_type: 'text', parts: ['Claude response'] },
|
||||
metadata: { model_slug: 'claude-3-opus' },
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
'msg-9': {
|
||||
id: 'msg-9',
|
||||
message: {
|
||||
id: 'msg-9',
|
||||
author: { role: 'assistant' },
|
||||
create_time: 1714585039.150442,
|
||||
content: { content_type: 'text', parts: ['No model slug response'] },
|
||||
metadata: {},
|
||||
},
|
||||
parent: 'msg-1',
|
||||
children: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const requestUserId = 'user-123';
|
||||
const importBatchBuilder = new ImportBatchBuilder(requestUserId);
|
||||
jest.spyOn(importBatchBuilder, 'saveMessage');
|
||||
|
||||
const importer = getImporter(testData);
|
||||
await importer(testData, requestUserId, () => importBatchBuilder);
|
||||
|
||||
const savedMessages = importBatchBuilder.saveMessage.mock.calls.map((call) => call[0]);
|
||||
|
||||
// Test various GPT model slug formats
|
||||
const gpt4 = savedMessages.find((msg) => msg.text === 'GPT-4 response');
|
||||
expect(gpt4.sender).toBe('GPT-4');
|
||||
expect(gpt4.model).toBe('gpt-4');
|
||||
|
||||
const gpt4o = savedMessages.find((msg) => msg.text === 'GPT-4o response');
|
||||
expect(gpt4o.sender).toBe('GPT-4o');
|
||||
expect(gpt4o.model).toBe('gpt-4o');
|
||||
|
||||
const gpt4oMini = savedMessages.find((msg) => msg.text === 'GPT-4o-mini response');
|
||||
expect(gpt4oMini.sender).toBe('GPT-4o-mini');
|
||||
expect(gpt4oMini.model).toBe('gpt-4o-mini');
|
||||
|
||||
const gpt35Turbo = savedMessages.find((msg) => msg.text === 'GPT-3.5-turbo response');
|
||||
expect(gpt35Turbo.sender).toBe('GPT-3.5-turbo');
|
||||
expect(gpt35Turbo.model).toBe('gpt-3.5-turbo');
|
||||
|
||||
const gpt4Turbo = savedMessages.find((msg) => msg.text === 'GPT-4-turbo response');
|
||||
expect(gpt4Turbo.sender).toBe('GPT-4-turbo');
|
||||
expect(gpt4Turbo.model).toBe('gpt-4-turbo');
|
||||
|
||||
const gpt4Preview = savedMessages.find((msg) => msg.text === 'GPT-4-1106-preview response');
|
||||
expect(gpt4Preview.sender).toBe('GPT-4-1106-preview');
|
||||
expect(gpt4Preview.model).toBe('gpt-4-1106-preview');
|
||||
|
||||
// Test non-GPT model (should use the model slug as sender)
|
||||
const claude = savedMessages.find((msg) => msg.text === 'Claude response');
|
||||
expect(claude.sender).toBe('claude-3-opus');
|
||||
expect(claude.model).toBe('claude-3-opus');
|
||||
|
||||
// Test missing model slug (should default to openAISettings.model.default)
|
||||
const noModel = savedMessages.find((msg) => msg.text === 'No model slug response');
|
||||
// When no model slug is provided, it defaults to gpt-4o-mini which gets formatted to GPT-4o-mini
|
||||
expect(noModel.sender).toBe('GPT-4o-mini');
|
||||
expect(noModel.model).toBe(openAISettings.model.default);
|
||||
|
||||
// Verify user message is unaffected
|
||||
const userMsg = savedMessages.find((msg) => msg.text === 'Test message');
|
||||
expect(userMsg.sender).toBe('user');
|
||||
expect(userMsg.isCreatedByUser).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('importLibreChatConvo', () => {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue