mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
feat: ConversationSummaryBufferMemory (#973)
* refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
This commit is contained in:
parent
be73deddcc
commit
317a1bd8da
46 changed files with 1410 additions and 440 deletions
|
|
@ -15,14 +15,6 @@ jest.mock('../../../models', () => {
|
|||
};
|
||||
});
|
||||
|
||||
jest.mock('langchain/text_splitter', () => {
|
||||
return {
|
||||
RecursiveCharacterTextSplitter: jest.fn().mockImplementation(() => {
|
||||
return { createDocuments: jest.fn().mockResolvedValue([]) };
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
jest.mock('langchain/chat_models/openai', () => {
|
||||
return {
|
||||
ChatOpenAI: jest.fn().mockImplementation(() => {
|
||||
|
|
@ -31,14 +23,6 @@ jest.mock('langchain/chat_models/openai', () => {
|
|||
};
|
||||
});
|
||||
|
||||
jest.mock('langchain/chains', () => {
|
||||
return {
|
||||
loadSummarizationChain: jest.fn().mockReturnValue({
|
||||
call: jest.fn().mockResolvedValue({ output_text: 'Refined answer' }),
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
let parentMessageId;
|
||||
let conversationId;
|
||||
const fakeMessages = [];
|
||||
|
|
@ -69,6 +53,13 @@ describe('BaseClient', () => {
|
|||
|
||||
beforeEach(() => {
|
||||
TestClient = initializeFakeClient(apiKey, options, fakeMessages);
|
||||
TestClient.summarizeMessages = jest.fn().mockResolvedValue({
|
||||
summaryMessage: {
|
||||
role: 'system',
|
||||
content: 'Refined answer',
|
||||
},
|
||||
summaryTokenCount: 5,
|
||||
});
|
||||
});
|
||||
|
||||
test('returns the input messages without instructions when addInstructions() is called with empty instructions', () => {
|
||||
|
|
@ -103,30 +94,24 @@ describe('BaseClient', () => {
|
|||
expect(result).toBe(expected);
|
||||
});
|
||||
|
||||
test('refines messages correctly in refineMessages()', async () => {
|
||||
test('refines messages correctly in summarizeMessages()', async () => {
|
||||
const messagesToRefine = [
|
||||
{ role: 'user', content: 'Hello', tokenCount: 10 },
|
||||
{ role: 'assistant', content: 'How can I help you?', tokenCount: 20 },
|
||||
];
|
||||
const remainingContextTokens = 100;
|
||||
const expectedRefinedMessage = {
|
||||
role: 'assistant',
|
||||
role: 'system',
|
||||
content: 'Refined answer',
|
||||
tokenCount: 14, // 'Refined answer'.length
|
||||
};
|
||||
|
||||
const result = await TestClient.refineMessages(messagesToRefine, remainingContextTokens);
|
||||
expect(result).toEqual(expectedRefinedMessage);
|
||||
const result = await TestClient.summarizeMessages({ messagesToRefine, remainingContextTokens });
|
||||
expect(result.summaryMessage).toEqual(expectedRefinedMessage);
|
||||
});
|
||||
|
||||
test('gets messages within token limit (under limit) correctly in getMessagesWithinTokenLimit()', async () => {
|
||||
TestClient.maxContextTokens = 100;
|
||||
TestClient.shouldRefineContext = true;
|
||||
TestClient.refineMessages = jest.fn().mockResolvedValue({
|
||||
role: 'assistant',
|
||||
content: 'Refined answer',
|
||||
tokenCount: 30,
|
||||
});
|
||||
TestClient.shouldSummarize = true;
|
||||
|
||||
const messages = [
|
||||
{ role: 'user', content: 'Hello', tokenCount: 5 },
|
||||
|
|
@ -142,43 +127,50 @@ describe('BaseClient', () => {
|
|||
const expectedRemainingContextTokens = 58 - 3; // (100 - 5 - 19 - 18) - 3
|
||||
const expectedMessagesToRefine = [];
|
||||
|
||||
const lastExpectedMessage =
|
||||
expectedMessagesToRefine?.[expectedMessagesToRefine.length - 1] ?? {};
|
||||
const expectedIndex = messages.findIndex((msg) => msg.content === lastExpectedMessage?.content);
|
||||
|
||||
const result = await TestClient.getMessagesWithinTokenLimit(messages);
|
||||
|
||||
expect(result.context).toEqual(expectedContext);
|
||||
expect(result.summaryIndex).toEqual(expectedIndex);
|
||||
expect(result.remainingContextTokens).toBe(expectedRemainingContextTokens);
|
||||
expect(result.messagesToRefine).toEqual(expectedMessagesToRefine);
|
||||
});
|
||||
|
||||
test('gets messages within token limit (over limit) correctly in getMessagesWithinTokenLimit()', async () => {
|
||||
test('gets result over token limit correctly in getMessagesWithinTokenLimit()', async () => {
|
||||
TestClient.maxContextTokens = 50; // Set a lower limit
|
||||
TestClient.shouldRefineContext = true;
|
||||
TestClient.refineMessages = jest.fn().mockResolvedValue({
|
||||
role: 'assistant',
|
||||
content: 'Refined answer',
|
||||
tokenCount: 4,
|
||||
});
|
||||
TestClient.shouldSummarize = true;
|
||||
|
||||
const messages = [
|
||||
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 30 },
|
||||
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 30 },
|
||||
{ role: 'user', content: 'Hello', tokenCount: 5 },
|
||||
{ role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
|
||||
{ role: 'user', content: 'I have a question.', tokenCount: 18 },
|
||||
];
|
||||
const expectedContext = [
|
||||
{ role: 'user', content: 'Hello', tokenCount: 5 },
|
||||
{ role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
|
||||
{ role: 'user', content: 'I have a question.', tokenCount: 18 },
|
||||
{ role: 'user', content: 'Hello', tokenCount: 30 },
|
||||
{ role: 'assistant', content: 'How can I help you?', tokenCount: 30 },
|
||||
{ role: 'user', content: 'I have a question.', tokenCount: 5 },
|
||||
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 19 },
|
||||
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 18 },
|
||||
];
|
||||
|
||||
// Subtract 3 tokens for Assistant Label priming after all messages have been counted.
|
||||
const expectedRemainingContextTokens = 8 - 3; // (50 - 18 - 19 - 5) - 3
|
||||
const expectedRemainingContextTokens = 5; // (50 - 18 - 19 - 5) - 3
|
||||
const expectedMessagesToRefine = [
|
||||
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 30 },
|
||||
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 30 },
|
||||
{ role: 'user', content: 'Hello', tokenCount: 30 },
|
||||
{ role: 'assistant', content: 'How can I help you?', tokenCount: 30 },
|
||||
];
|
||||
const expectedContext = [
|
||||
{ role: 'user', content: 'I have a question.', tokenCount: 5 },
|
||||
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 19 },
|
||||
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 18 },
|
||||
];
|
||||
|
||||
const lastExpectedMessage =
|
||||
expectedMessagesToRefine?.[expectedMessagesToRefine.length - 1] ?? {};
|
||||
const expectedIndex = messages.findIndex((msg) => msg.content === lastExpectedMessage?.content);
|
||||
|
||||
const result = await TestClient.getMessagesWithinTokenLimit(messages);
|
||||
|
||||
expect(result.context).toEqual(expectedContext);
|
||||
expect(result.summaryIndex).toEqual(expectedIndex);
|
||||
expect(result.remainingContextTokens).toBe(expectedRemainingContextTokens);
|
||||
expect(result.messagesToRefine).toEqual(expectedMessagesToRefine);
|
||||
});
|
||||
|
|
@ -200,13 +192,9 @@ describe('BaseClient', () => {
|
|||
],
|
||||
remainingContextTokens: 80,
|
||||
messagesToRefine: [{ content: 'Hello' }],
|
||||
refineIndex: 3,
|
||||
});
|
||||
TestClient.refineMessages = jest.fn().mockResolvedValue({
|
||||
role: 'assistant',
|
||||
content: 'Refined answer',
|
||||
tokenCount: 30,
|
||||
summaryIndex: 3,
|
||||
});
|
||||
|
||||
TestClient.getTokenCountForResponse = jest.fn().mockReturnValue(40);
|
||||
|
||||
const instructions = { content: 'Please provide more details.' };
|
||||
|
|
@ -225,9 +213,8 @@ describe('BaseClient', () => {
|
|||
const expectedResult = {
|
||||
payload: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'Refined answer',
|
||||
role: 'assistant',
|
||||
tokenCount: 30,
|
||||
},
|
||||
{ content: 'How can I help you?' },
|
||||
{ content: 'Please provide more details.' },
|
||||
|
|
@ -238,14 +225,214 @@ describe('BaseClient', () => {
|
|||
messages: expect.any(Array),
|
||||
};
|
||||
|
||||
TestClient.shouldSummarize = true;
|
||||
const result = await TestClient.handleContextStrategy({
|
||||
instructions,
|
||||
orderedMessages,
|
||||
formattedMessages,
|
||||
});
|
||||
|
||||
expect(result).toEqual(expectedResult);
|
||||
});
|
||||
|
||||
describe('getMessagesForConversation', () => {
|
||||
it('should return an empty array if the parentMessageId does not exist', () => {
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: unorderedMessages,
|
||||
parentMessageId: '999',
|
||||
});
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle messages with messageId property', () => {
|
||||
const messagesWithMessageId = [
|
||||
{ messageId: '1', parentMessageId: null, text: 'Message 1' },
|
||||
{ messageId: '2', parentMessageId: '1', text: 'Message 2' },
|
||||
];
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: messagesWithMessageId,
|
||||
parentMessageId: '2',
|
||||
});
|
||||
expect(result).toEqual([
|
||||
{ messageId: '1', parentMessageId: null, text: 'Message 1' },
|
||||
{ messageId: '2', parentMessageId: '1', text: 'Message 2' },
|
||||
]);
|
||||
});
|
||||
|
||||
const messagesWithNullParent = [
|
||||
{ id: '1', parentMessageId: null, text: 'Message 1' },
|
||||
{ id: '2', parentMessageId: null, text: 'Message 2' },
|
||||
];
|
||||
|
||||
it('should handle messages with null parentMessageId that are not root', () => {
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: messagesWithNullParent,
|
||||
parentMessageId: '2',
|
||||
});
|
||||
expect(result).toEqual([{ id: '2', parentMessageId: null, text: 'Message 2' }]);
|
||||
});
|
||||
|
||||
const cyclicMessages = [
|
||||
{ id: '3', parentMessageId: '2', text: 'Message 3' },
|
||||
{ id: '1', parentMessageId: '3', text: 'Message 1' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2' },
|
||||
];
|
||||
|
||||
it('should handle cyclic references without going into an infinite loop', () => {
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: cyclicMessages,
|
||||
parentMessageId: '3',
|
||||
});
|
||||
expect(result).toEqual([
|
||||
{ id: '1', parentMessageId: '3', text: 'Message 1' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2' },
|
||||
{ id: '3', parentMessageId: '2', text: 'Message 3' },
|
||||
]);
|
||||
});
|
||||
|
||||
const unorderedMessages = [
|
||||
{ id: '3', parentMessageId: '2', text: 'Message 3' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2' },
|
||||
{ id: '1', parentMessageId: '00000000-0000-0000-0000-000000000000', text: 'Message 1' },
|
||||
];
|
||||
|
||||
it('should return ordered messages based on parentMessageId', () => {
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: unorderedMessages,
|
||||
parentMessageId: '3',
|
||||
});
|
||||
expect(result).toEqual([
|
||||
{ id: '1', parentMessageId: '00000000-0000-0000-0000-000000000000', text: 'Message 1' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2' },
|
||||
{ id: '3', parentMessageId: '2', text: 'Message 3' },
|
||||
]);
|
||||
});
|
||||
|
||||
const unorderedBranchedMessages = [
|
||||
{ id: '4', parentMessageId: '2', text: 'Message 4', summary: 'Summary for Message 4' },
|
||||
{ id: '10', parentMessageId: '7', text: 'Message 10' },
|
||||
{ id: '1', parentMessageId: null, text: 'Message 1' },
|
||||
{ id: '6', parentMessageId: '5', text: 'Message 7' },
|
||||
{ id: '7', parentMessageId: '5', text: 'Message 7' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2' },
|
||||
{ id: '8', parentMessageId: '6', text: 'Message 8' },
|
||||
{ id: '5', parentMessageId: '3', text: 'Message 5' },
|
||||
{ id: '3', parentMessageId: '1', text: 'Message 3' },
|
||||
{ id: '6', parentMessageId: '4', text: 'Message 6' },
|
||||
{ id: '8', parentMessageId: '7', text: 'Message 9' },
|
||||
{ id: '9', parentMessageId: '7', text: 'Message 9' },
|
||||
{ id: '11', parentMessageId: '2', text: 'Message 11', summary: 'Summary for Message 11' },
|
||||
];
|
||||
|
||||
it('should return ordered messages from a branched array based on parentMessageId', () => {
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: unorderedBranchedMessages,
|
||||
parentMessageId: '10',
|
||||
summary: true,
|
||||
});
|
||||
expect(result).toEqual([
|
||||
{ id: '1', parentMessageId: null, text: 'Message 1' },
|
||||
{ id: '3', parentMessageId: '1', text: 'Message 3' },
|
||||
{ id: '5', parentMessageId: '3', text: 'Message 5' },
|
||||
{ id: '7', parentMessageId: '5', text: 'Message 7' },
|
||||
{ id: '10', parentMessageId: '7', text: 'Message 10' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return an empty array if no messages are provided', () => {
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: [],
|
||||
parentMessageId: '3',
|
||||
});
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should map over the ordered messages if mapMethod is provided', () => {
|
||||
const mapMethod = (msg) => msg.text;
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: unorderedMessages,
|
||||
parentMessageId: '3',
|
||||
mapMethod,
|
||||
});
|
||||
expect(result).toEqual(['Message 1', 'Message 2', 'Message 3']);
|
||||
});
|
||||
|
||||
let unorderedMessagesWithSummary = [
|
||||
{ id: '4', parentMessageId: '3', text: 'Message 4' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2', summary: 'Summary for Message 2' },
|
||||
{ id: '3', parentMessageId: '2', text: 'Message 3', summary: 'Summary for Message 3' },
|
||||
{ id: '1', parentMessageId: null, text: 'Message 1' },
|
||||
];
|
||||
|
||||
it('should start with the message that has a summary property and continue until the specified parentMessageId', () => {
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: unorderedMessagesWithSummary,
|
||||
parentMessageId: '4',
|
||||
summary: true,
|
||||
});
|
||||
expect(result).toEqual([
|
||||
{
|
||||
id: '3',
|
||||
parentMessageId: '2',
|
||||
role: 'system',
|
||||
text: 'Summary for Message 3',
|
||||
summary: 'Summary for Message 3',
|
||||
},
|
||||
{ id: '4', parentMessageId: '3', text: 'Message 4' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle multiple summaries and return the branch from the latest to the parentMessageId', () => {
|
||||
unorderedMessagesWithSummary = [
|
||||
{ id: '5', parentMessageId: '4', text: 'Message 5' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2', summary: 'Summary for Message 2' },
|
||||
{ id: '3', parentMessageId: '2', text: 'Message 3', summary: 'Summary for Message 3' },
|
||||
{ id: '4', parentMessageId: '3', text: 'Message 4', summary: 'Summary for Message 4' },
|
||||
{ id: '1', parentMessageId: null, text: 'Message 1' },
|
||||
];
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: unorderedMessagesWithSummary,
|
||||
parentMessageId: '5',
|
||||
summary: true,
|
||||
});
|
||||
expect(result).toEqual([
|
||||
{
|
||||
id: '4',
|
||||
parentMessageId: '3',
|
||||
role: 'system',
|
||||
text: 'Summary for Message 4',
|
||||
summary: 'Summary for Message 4',
|
||||
},
|
||||
{ id: '5', parentMessageId: '4', text: 'Message 5' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle summary at root edge case and continue until the parentMessageId', () => {
|
||||
unorderedMessagesWithSummary = [
|
||||
{ id: '5', parentMessageId: '4', text: 'Message 5' },
|
||||
{ id: '1', parentMessageId: null, text: 'Message 1', summary: 'Summary for Message 1' },
|
||||
{ id: '4', parentMessageId: '3', text: 'Message 4', summary: 'Summary for Message 4' },
|
||||
{ id: '2', parentMessageId: '1', text: 'Message 2', summary: 'Summary for Message 2' },
|
||||
{ id: '3', parentMessageId: '2', text: 'Message 3', summary: 'Summary for Message 3' },
|
||||
];
|
||||
const result = TestClient.constructor.getMessagesForConversation({
|
||||
messages: unorderedMessagesWithSummary,
|
||||
parentMessageId: '5',
|
||||
summary: true,
|
||||
});
|
||||
expect(result).toEqual([
|
||||
{
|
||||
id: '4',
|
||||
parentMessageId: '3',
|
||||
role: 'system',
|
||||
text: 'Summary for Message 4',
|
||||
summary: 'Summary for Message 4',
|
||||
},
|
||||
{ id: '5', parentMessageId: '4', text: 'Message 5' },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sendMessage', () => {
|
||||
test('sendMessage should return a response message', async () => {
|
||||
const expectedResult = expect.objectContaining({
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue