🧩 fix: Human Message Content Handling for Legacy Content (#8525)

* wip: first pass content strings

* 📦 chore: update @langchain/core to v0.3.62 for data-provider dev dep.

* 📦 chore: bump @langchain/core to v0.3.62 for api dep.

* 📦 chore: move @langchain/core to peerDependencies in package.json and package-lock.json

* fix: update formatContentStrings to create HumanMessage directly from formatted content

* chore: import order
This commit is contained in:
Danny Avila 2025-07-17 18:34:24 -04:00 committed by GitHub
parent cd9c578907
commit 445e9eae85
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 398 additions and 39 deletions

View file

@ -237,41 +237,9 @@ const formatAgentMessages = (payload) => {
return messages;
};
/**
* Formats an array of messages for LangChain, making sure all content fields are strings
* @param {Array<(HumanMessage|AIMessage|SystemMessage|ToolMessage)>} payload - The array of messages to format.
* @returns {Array<(HumanMessage|AIMessage|SystemMessage|ToolMessage)>} - The array of formatted LangChain messages, including ToolMessages for tool calls.
*/
const formatContentStrings = (payload) => {
const messages = [];
for (const message of payload) {
if (typeof message.content === 'string') {
continue;
}
if (!Array.isArray(message.content)) {
continue;
}
// Reduce text types to a single string, ignore all other types
const content = message.content.reduce((acc, curr) => {
if (curr.type === ContentTypes.TEXT) {
return `${acc}${curr[ContentTypes.TEXT]}\n`;
}
return acc;
}, '');
message.content = content.trim();
}
return messages;
};
module.exports = {
formatMessage,
formatFromLangChain,
formatAgentMessages,
formatContentStrings,
formatLangChainMessages,
};

View file

@ -44,7 +44,7 @@
"@googleapis/youtube": "^20.0.0",
"@keyv/redis": "^4.3.3",
"@langchain/community": "^0.3.47",
"@langchain/core": "^0.3.60",
"@langchain/core": "^0.3.62",
"@langchain/google-genai": "^0.2.13",
"@langchain/google-vertexai": "^0.2.13",
"@langchain/openai": "^0.5.18",

View file

@ -8,6 +8,7 @@ const {
Tokenizer,
checkAccess,
memoryInstructions,
formatContentStrings,
createMemoryProcessor,
} = require('@librechat/api');
const {
@ -16,7 +17,6 @@ const {
GraphEvents,
formatMessage,
formatAgentMessages,
formatContentStrings,
getTokenCountForMessage,
createMetadataAggregator,
} = require('@librechat/agents');

6
package-lock.json generated
View file

@ -60,7 +60,7 @@
"@googleapis/youtube": "^20.0.0",
"@keyv/redis": "^4.3.3",
"@langchain/community": "^0.3.47",
"@langchain/core": "^0.3.60",
"@langchain/core": "^0.3.62",
"@langchain/google-genai": "^0.2.13",
"@langchain/google-vertexai": "^0.2.13",
"@langchain/openai": "^0.5.18",
@ -46473,7 +46473,6 @@
"@babel/preset-env": "^7.21.5",
"@babel/preset-react": "^7.18.6",
"@babel/preset-typescript": "^7.21.0",
"@langchain/core": "^0.3.62",
"@rollup/plugin-alias": "^5.1.0",
"@rollup/plugin-commonjs": "^25.0.2",
"@rollup/plugin-json": "^6.1.0",
@ -46500,6 +46499,7 @@
"typescript": "^5.0.4"
},
"peerDependencies": {
"@langchain/core": "^0.3.62",
"@librechat/agents": "^2.4.62",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.13.3",
@ -46605,7 +46605,7 @@
"@babel/preset-env": "^7.21.5",
"@babel/preset-react": "^7.18.6",
"@babel/preset-typescript": "^7.21.0",
"@langchain/core": "^0.3.57",
"@langchain/core": "^0.3.62",
"@rollup/plugin-alias": "^5.1.0",
"@rollup/plugin-commonjs": "^25.0.2",
"@rollup/plugin-json": "^6.1.0",

View file

@ -40,7 +40,6 @@
"@babel/preset-env": "^7.21.5",
"@babel/preset-react": "^7.18.6",
"@babel/preset-typescript": "^7.21.0",
"@langchain/core": "^0.3.62",
"@rollup/plugin-alias": "^5.1.0",
"@rollup/plugin-commonjs": "^25.0.2",
"@rollup/plugin-json": "^6.1.0",
@ -70,6 +69,7 @@
"registry": "https://registry.npmjs.org/"
},
"peerDependencies": {
"@langchain/core": "^0.3.62",
"@librechat/agents": "^2.4.62",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.13.3",

View file

@ -0,0 +1,331 @@
import { ContentTypes } from 'librechat-data-provider';
import { HumanMessage, AIMessage, SystemMessage } from '@langchain/core/messages';
import { formatContentStrings } from './content';
describe('formatContentStrings', () => {
describe('Human messages', () => {
it('should convert human message with all text blocks to string', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hello' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'World' },
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toBe('Hello\nWorld');
});
it('should not convert human message with mixed content types (text + image)', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, text: 'what do you see' },
{
type: 'image_url',
image_url: {
url: '_SOME_BASE64_DATA=',
detail: 'auto',
},
},
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toEqual([
{ type: ContentTypes.TEXT, text: 'what do you see' },
{
type: 'image_url',
image_url: {
url: '_SOME_BASE64_DATA=',
detail: 'auto',
},
},
]);
});
it('should leave string content unchanged', () => {
const messages = [
new HumanMessage({
content: 'Hello World',
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toBe('Hello World');
});
it('should handle empty text blocks', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hello' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: '' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'World' },
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toBe('Hello\n\nWorld');
});
it('should handle null/undefined text values', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hello' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: null },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: undefined },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'World' },
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toBe('Hello\n\n\nWorld');
});
});
describe('Non-human messages', () => {
it('should not modify AI message content', () => {
const messages = [
new AIMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hello' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'World' },
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toEqual([
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hello' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'World' },
]);
});
it('should not modify System message content', () => {
const messages = [
new SystemMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'System' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Message' },
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toEqual([
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'System' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Message' },
]);
});
});
describe('Mixed message types', () => {
it('should only process human messages in mixed array', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Human' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Message' },
],
}),
new AIMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'AI' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Response' },
],
}),
new SystemMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'System' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Prompt' },
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(3);
// Human message should be converted
expect(result[0].content).toBe('Human\nMessage');
// AI message should remain unchanged
expect(result[1].content).toEqual([
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'AI' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Response' },
]);
// System message should remain unchanged
expect(result[2].content).toEqual([
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'System' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Prompt' },
]);
});
});
describe('Edge cases', () => {
it('should handle empty array', () => {
const result = formatContentStrings([]);
expect(result).toEqual([]);
});
it('should handle messages with non-array content', () => {
const messages = [
new HumanMessage({
content: 'This is a string content',
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toBe('This is a string content');
});
it('should trim the final concatenated string', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: ' Hello ' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: ' World ' },
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
expect(result[0].content).toBe('Hello \n World');
});
it('should not modify the original messages array', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hello' },
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'World' },
],
}),
];
const originalContent = [
...(messages[0].content as Array<{ type: string; [key: string]: unknown }>),
];
formatContentStrings(messages);
expect(messages[0].content).toEqual(originalContent);
});
});
describe('Real-world scenarios', () => {
it('should handle the exact scenario from the issue', () => {
const messages = [
new HumanMessage({
content: [
{
type: 'text',
text: 'hi there',
},
],
}),
new AIMessage({
content: [
{
type: 'text',
text: 'Hi Danny! How can I help you today?',
},
],
}),
new HumanMessage({
content: [
{
type: 'text',
text: 'what do you see',
},
{
type: 'image_url',
image_url: {
url: '_SOME_BASE64_DATA=',
detail: 'auto',
},
},
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(3);
// First human message (all text) should be converted
expect(result[0].content).toBe('hi there');
// AI message should remain unchanged
expect(result[1].content).toEqual([
{
type: 'text',
text: 'Hi Danny! How can I help you today?',
},
]);
// Third message (mixed content) should remain unchanged
expect(result[2].content).toEqual([
{
type: 'text',
text: 'what do you see',
},
{
type: 'image_url',
image_url: {
url: '_SOME_BASE64_DATA=',
detail: 'auto',
},
},
]);
});
it('should handle human messages with tool calls', () => {
const messages = [
new HumanMessage({
content: [
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Please use the calculator' },
{
type: ContentTypes.TOOL_CALL,
tool_call: { name: 'calculator', args: '{"a": 1, "b": 2}' },
},
],
}),
];
const result = formatContentStrings(messages);
expect(result).toHaveLength(1);
// Should not convert because not all blocks are text
expect(result[0].content).toEqual([
{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Please use the calculator' },
{
type: ContentTypes.TOOL_CALL,
tool_call: { name: 'calculator', args: '{"a": 1, "b": 2}' },
},
]);
});
});
});

View file

@ -0,0 +1,58 @@
import { ContentTypes } from 'librechat-data-provider';
import { HumanMessage } from '@langchain/core/messages';
import type { BaseMessage } from '@langchain/core/messages';
/**
* Formats an array of messages for LangChain, making sure all content fields are strings
* @param {Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>} payload - The array of messages to format.
* @returns {Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>} - The array of formatted LangChain messages, including ToolMessages for tool calls.
*/
export const formatContentStrings = (payload: Array<BaseMessage>): Array<BaseMessage> => {
// Create a new array to store the processed messages
const result: Array<BaseMessage> = [];
for (const message of payload) {
const messageType = message.getType();
const isHumanMessage = messageType === 'human';
// Skip non-human messages - add them as-is
if (!isHumanMessage) {
result.push(message);
continue;
}
// If content is already a string, add as-is
if (typeof message.content === 'string') {
result.push(message);
continue;
}
// If content is not an array, add as-is
if (!Array.isArray(message.content)) {
result.push(message);
continue;
}
// Check if all content blocks are text type
const allTextBlocks = message.content.every((block) => block.type === ContentTypes.TEXT);
// Only convert to string if all blocks are text type
if (!allTextBlocks) {
result.push(message);
continue;
}
// Reduce text types to a single string
const content = message.content.reduce((acc, curr) => {
if (curr.type === ContentTypes.TEXT) {
return `${acc}${curr[ContentTypes.TEXT] || ''}\n`;
}
return acc;
}, '');
const clonedMessage = new HumanMessage(content.trim());
result.push(clonedMessage);
}
return result;
};

View file

@ -0,0 +1 @@
export * from './content';

View file

@ -4,6 +4,7 @@ export * from './mcp/oauth';
export * from './mcp/auth';
export * from './mcp/zod';
/* Utilities */
export * from './format';
export * from './mcp/utils';
export * from './utils';
/* OAuth */

View file

@ -48,7 +48,7 @@
"@babel/preset-env": "^7.21.5",
"@babel/preset-react": "^7.18.6",
"@babel/preset-typescript": "^7.21.0",
"@langchain/core": "^0.3.57",
"@langchain/core": "^0.3.62",
"@rollup/plugin-alias": "^5.1.0",
"@rollup/plugin-commonjs": "^25.0.2",
"@rollup/plugin-json": "^6.1.0",