feat: ConversationSummaryBufferMemory (#973)

* refactor: pass model in message edit payload, use encoder in standalone util function

* feat: add summaryBuffer helper

* refactor(api/messages): use new countTokens helper and add auth middleware at top

* wip: ConversationSummaryBufferMemory

* refactor: move pre-generation helpers to prompts dir

* chore: remove console log

* chore: remove test as payload will no longer carry tokenCount

* chore: update getMessagesWithinTokenLimit JSDoc

* refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests

* refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message

* chore: add newer model to token map

* fix: condition was point to prop of array instead of message prop

* refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary
refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present
fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present

* chore: log previous_summary if debugging

* refactor(formatMessage): assume if role is defined that it's a valid value

* refactor(getMessagesWithinTokenLimit): remove summary logic
refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned
refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary
refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method
refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic

* fix: undefined handling and summarizing only when shouldRefineContext is true

* chore(BaseClient): fix test results omitting system role for summaries and test edge case

* chore: export summaryBuffer from index file

* refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer

* feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine'

* refactor: rename refineMessages method to summarizeMessages for clarity

* chore: clarify summary future intent in .env.example

* refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed

* feat(gptPlugins): enable summarization for plugins

* refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner

* refactor(agents): use ConversationSummaryBufferMemory for both agent types

* refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests

* refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers

* fix: forgot to spread formatMessages also took opportunity to pluralize filename

* refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing

* ci(formatMessages): add more exhaustive checks for langchain messages

* feat: add debug env var for OpenAI

* chore: delete unnecessary comments

* chore: add extra note about summary feature

* fix: remove tokenCount from payload instructions

* fix: test fail

* fix: only pass instructions to payload when defined or not empty object

* refactor: fromPromptMessages is deprecated, use renamed method fromMessages

* refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility

* fix(PluginsClient.buildPromptBody): handle undefined message strings

* chore: log langchain titling error

* feat: getModelMaxTokens helper

* feat: tokenSplit helper

* feat: summary prompts updated

* fix: optimize _CUT_OFF_SUMMARIZER prompt

* refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context

* fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context,
refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this
refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning

* fix(handleContextStrategy): handle case where incoming prompt is bigger than model context

* chore: rename refinedContent to splitText

* chore: remove unnecessary debug log
This commit is contained in:
Danny Avila 2023-09-26 21:02:28 -04:00 committed by GitHub
parent be73deddcc
commit 317a1bd8da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
46 changed files with 1410 additions and 440 deletions

View file

@ -15,14 +15,6 @@ jest.mock('../../../models', () => {
};
});
jest.mock('langchain/text_splitter', () => {
return {
RecursiveCharacterTextSplitter: jest.fn().mockImplementation(() => {
return { createDocuments: jest.fn().mockResolvedValue([]) };
}),
};
});
jest.mock('langchain/chat_models/openai', () => {
return {
ChatOpenAI: jest.fn().mockImplementation(() => {
@ -31,14 +23,6 @@ jest.mock('langchain/chat_models/openai', () => {
};
});
jest.mock('langchain/chains', () => {
return {
loadSummarizationChain: jest.fn().mockReturnValue({
call: jest.fn().mockResolvedValue({ output_text: 'Refined answer' }),
}),
};
});
let parentMessageId;
let conversationId;
const fakeMessages = [];
@ -69,6 +53,13 @@ describe('BaseClient', () => {
beforeEach(() => {
TestClient = initializeFakeClient(apiKey, options, fakeMessages);
TestClient.summarizeMessages = jest.fn().mockResolvedValue({
summaryMessage: {
role: 'system',
content: 'Refined answer',
},
summaryTokenCount: 5,
});
});
test('returns the input messages without instructions when addInstructions() is called with empty instructions', () => {
@ -103,30 +94,24 @@ describe('BaseClient', () => {
expect(result).toBe(expected);
});
test('refines messages correctly in refineMessages()', async () => {
test('refines messages correctly in summarizeMessages()', async () => {
const messagesToRefine = [
{ role: 'user', content: 'Hello', tokenCount: 10 },
{ role: 'assistant', content: 'How can I help you?', tokenCount: 20 },
];
const remainingContextTokens = 100;
const expectedRefinedMessage = {
role: 'assistant',
role: 'system',
content: 'Refined answer',
tokenCount: 14, // 'Refined answer'.length
};
const result = await TestClient.refineMessages(messagesToRefine, remainingContextTokens);
expect(result).toEqual(expectedRefinedMessage);
const result = await TestClient.summarizeMessages({ messagesToRefine, remainingContextTokens });
expect(result.summaryMessage).toEqual(expectedRefinedMessage);
});
test('gets messages within token limit (under limit) correctly in getMessagesWithinTokenLimit()', async () => {
TestClient.maxContextTokens = 100;
TestClient.shouldRefineContext = true;
TestClient.refineMessages = jest.fn().mockResolvedValue({
role: 'assistant',
content: 'Refined answer',
tokenCount: 30,
});
TestClient.shouldSummarize = true;
const messages = [
{ role: 'user', content: 'Hello', tokenCount: 5 },
@ -142,43 +127,50 @@ describe('BaseClient', () => {
const expectedRemainingContextTokens = 58 - 3; // (100 - 5 - 19 - 18) - 3
const expectedMessagesToRefine = [];
const lastExpectedMessage =
expectedMessagesToRefine?.[expectedMessagesToRefine.length - 1] ?? {};
const expectedIndex = messages.findIndex((msg) => msg.content === lastExpectedMessage?.content);
const result = await TestClient.getMessagesWithinTokenLimit(messages);
expect(result.context).toEqual(expectedContext);
expect(result.summaryIndex).toEqual(expectedIndex);
expect(result.remainingContextTokens).toBe(expectedRemainingContextTokens);
expect(result.messagesToRefine).toEqual(expectedMessagesToRefine);
});
test('gets messages within token limit (over limit) correctly in getMessagesWithinTokenLimit()', async () => {
test('gets result over token limit correctly in getMessagesWithinTokenLimit()', async () => {
TestClient.maxContextTokens = 50; // Set a lower limit
TestClient.shouldRefineContext = true;
TestClient.refineMessages = jest.fn().mockResolvedValue({
role: 'assistant',
content: 'Refined answer',
tokenCount: 4,
});
TestClient.shouldSummarize = true;
const messages = [
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 30 },
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 30 },
{ role: 'user', content: 'Hello', tokenCount: 5 },
{ role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
{ role: 'user', content: 'I have a question.', tokenCount: 18 },
];
const expectedContext = [
{ role: 'user', content: 'Hello', tokenCount: 5 },
{ role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
{ role: 'user', content: 'I have a question.', tokenCount: 18 },
{ role: 'user', content: 'Hello', tokenCount: 30 },
{ role: 'assistant', content: 'How can I help you?', tokenCount: 30 },
{ role: 'user', content: 'I have a question.', tokenCount: 5 },
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 19 },
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 18 },
];
// Subtract 3 tokens for Assistant Label priming after all messages have been counted.
const expectedRemainingContextTokens = 8 - 3; // (50 - 18 - 19 - 5) - 3
const expectedRemainingContextTokens = 5; // (50 - 18 - 19 - 5) - 3
const expectedMessagesToRefine = [
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 30 },
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 30 },
{ role: 'user', content: 'Hello', tokenCount: 30 },
{ role: 'assistant', content: 'How can I help you?', tokenCount: 30 },
];
const expectedContext = [
{ role: 'user', content: 'I have a question.', tokenCount: 5 },
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 19 },
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 18 },
];
const lastExpectedMessage =
expectedMessagesToRefine?.[expectedMessagesToRefine.length - 1] ?? {};
const expectedIndex = messages.findIndex((msg) => msg.content === lastExpectedMessage?.content);
const result = await TestClient.getMessagesWithinTokenLimit(messages);
expect(result.context).toEqual(expectedContext);
expect(result.summaryIndex).toEqual(expectedIndex);
expect(result.remainingContextTokens).toBe(expectedRemainingContextTokens);
expect(result.messagesToRefine).toEqual(expectedMessagesToRefine);
});
@ -200,13 +192,9 @@ describe('BaseClient', () => {
],
remainingContextTokens: 80,
messagesToRefine: [{ content: 'Hello' }],
refineIndex: 3,
});
TestClient.refineMessages = jest.fn().mockResolvedValue({
role: 'assistant',
content: 'Refined answer',
tokenCount: 30,
summaryIndex: 3,
});
TestClient.getTokenCountForResponse = jest.fn().mockReturnValue(40);
const instructions = { content: 'Please provide more details.' };
@ -225,9 +213,8 @@ describe('BaseClient', () => {
const expectedResult = {
payload: [
{
role: 'system',
content: 'Refined answer',
role: 'assistant',
tokenCount: 30,
},
{ content: 'How can I help you?' },
{ content: 'Please provide more details.' },
@ -238,14 +225,214 @@ describe('BaseClient', () => {
messages: expect.any(Array),
};
TestClient.shouldSummarize = true;
const result = await TestClient.handleContextStrategy({
instructions,
orderedMessages,
formattedMessages,
});
expect(result).toEqual(expectedResult);
});
describe('getMessagesForConversation', () => {
it('should return an empty array if the parentMessageId does not exist', () => {
const result = TestClient.constructor.getMessagesForConversation({
messages: unorderedMessages,
parentMessageId: '999',
});
expect(result).toEqual([]);
});
it('should handle messages with messageId property', () => {
const messagesWithMessageId = [
{ messageId: '1', parentMessageId: null, text: 'Message 1' },
{ messageId: '2', parentMessageId: '1', text: 'Message 2' },
];
const result = TestClient.constructor.getMessagesForConversation({
messages: messagesWithMessageId,
parentMessageId: '2',
});
expect(result).toEqual([
{ messageId: '1', parentMessageId: null, text: 'Message 1' },
{ messageId: '2', parentMessageId: '1', text: 'Message 2' },
]);
});
const messagesWithNullParent = [
{ id: '1', parentMessageId: null, text: 'Message 1' },
{ id: '2', parentMessageId: null, text: 'Message 2' },
];
it('should handle messages with null parentMessageId that are not root', () => {
const result = TestClient.constructor.getMessagesForConversation({
messages: messagesWithNullParent,
parentMessageId: '2',
});
expect(result).toEqual([{ id: '2', parentMessageId: null, text: 'Message 2' }]);
});
const cyclicMessages = [
{ id: '3', parentMessageId: '2', text: 'Message 3' },
{ id: '1', parentMessageId: '3', text: 'Message 1' },
{ id: '2', parentMessageId: '1', text: 'Message 2' },
];
it('should handle cyclic references without going into an infinite loop', () => {
const result = TestClient.constructor.getMessagesForConversation({
messages: cyclicMessages,
parentMessageId: '3',
});
expect(result).toEqual([
{ id: '1', parentMessageId: '3', text: 'Message 1' },
{ id: '2', parentMessageId: '1', text: 'Message 2' },
{ id: '3', parentMessageId: '2', text: 'Message 3' },
]);
});
const unorderedMessages = [
{ id: '3', parentMessageId: '2', text: 'Message 3' },
{ id: '2', parentMessageId: '1', text: 'Message 2' },
{ id: '1', parentMessageId: '00000000-0000-0000-0000-000000000000', text: 'Message 1' },
];
it('should return ordered messages based on parentMessageId', () => {
const result = TestClient.constructor.getMessagesForConversation({
messages: unorderedMessages,
parentMessageId: '3',
});
expect(result).toEqual([
{ id: '1', parentMessageId: '00000000-0000-0000-0000-000000000000', text: 'Message 1' },
{ id: '2', parentMessageId: '1', text: 'Message 2' },
{ id: '3', parentMessageId: '2', text: 'Message 3' },
]);
});
const unorderedBranchedMessages = [
{ id: '4', parentMessageId: '2', text: 'Message 4', summary: 'Summary for Message 4' },
{ id: '10', parentMessageId: '7', text: 'Message 10' },
{ id: '1', parentMessageId: null, text: 'Message 1' },
{ id: '6', parentMessageId: '5', text: 'Message 7' },
{ id: '7', parentMessageId: '5', text: 'Message 7' },
{ id: '2', parentMessageId: '1', text: 'Message 2' },
{ id: '8', parentMessageId: '6', text: 'Message 8' },
{ id: '5', parentMessageId: '3', text: 'Message 5' },
{ id: '3', parentMessageId: '1', text: 'Message 3' },
{ id: '6', parentMessageId: '4', text: 'Message 6' },
{ id: '8', parentMessageId: '7', text: 'Message 9' },
{ id: '9', parentMessageId: '7', text: 'Message 9' },
{ id: '11', parentMessageId: '2', text: 'Message 11', summary: 'Summary for Message 11' },
];
it('should return ordered messages from a branched array based on parentMessageId', () => {
const result = TestClient.constructor.getMessagesForConversation({
messages: unorderedBranchedMessages,
parentMessageId: '10',
summary: true,
});
expect(result).toEqual([
{ id: '1', parentMessageId: null, text: 'Message 1' },
{ id: '3', parentMessageId: '1', text: 'Message 3' },
{ id: '5', parentMessageId: '3', text: 'Message 5' },
{ id: '7', parentMessageId: '5', text: 'Message 7' },
{ id: '10', parentMessageId: '7', text: 'Message 10' },
]);
});
it('should return an empty array if no messages are provided', () => {
const result = TestClient.constructor.getMessagesForConversation({
messages: [],
parentMessageId: '3',
});
expect(result).toEqual([]);
});
it('should map over the ordered messages if mapMethod is provided', () => {
const mapMethod = (msg) => msg.text;
const result = TestClient.constructor.getMessagesForConversation({
messages: unorderedMessages,
parentMessageId: '3',
mapMethod,
});
expect(result).toEqual(['Message 1', 'Message 2', 'Message 3']);
});
let unorderedMessagesWithSummary = [
{ id: '4', parentMessageId: '3', text: 'Message 4' },
{ id: '2', parentMessageId: '1', text: 'Message 2', summary: 'Summary for Message 2' },
{ id: '3', parentMessageId: '2', text: 'Message 3', summary: 'Summary for Message 3' },
{ id: '1', parentMessageId: null, text: 'Message 1' },
];
it('should start with the message that has a summary property and continue until the specified parentMessageId', () => {
const result = TestClient.constructor.getMessagesForConversation({
messages: unorderedMessagesWithSummary,
parentMessageId: '4',
summary: true,
});
expect(result).toEqual([
{
id: '3',
parentMessageId: '2',
role: 'system',
text: 'Summary for Message 3',
summary: 'Summary for Message 3',
},
{ id: '4', parentMessageId: '3', text: 'Message 4' },
]);
});
it('should handle multiple summaries and return the branch from the latest to the parentMessageId', () => {
unorderedMessagesWithSummary = [
{ id: '5', parentMessageId: '4', text: 'Message 5' },
{ id: '2', parentMessageId: '1', text: 'Message 2', summary: 'Summary for Message 2' },
{ id: '3', parentMessageId: '2', text: 'Message 3', summary: 'Summary for Message 3' },
{ id: '4', parentMessageId: '3', text: 'Message 4', summary: 'Summary for Message 4' },
{ id: '1', parentMessageId: null, text: 'Message 1' },
];
const result = TestClient.constructor.getMessagesForConversation({
messages: unorderedMessagesWithSummary,
parentMessageId: '5',
summary: true,
});
expect(result).toEqual([
{
id: '4',
parentMessageId: '3',
role: 'system',
text: 'Summary for Message 4',
summary: 'Summary for Message 4',
},
{ id: '5', parentMessageId: '4', text: 'Message 5' },
]);
});
it('should handle summary at root edge case and continue until the parentMessageId', () => {
unorderedMessagesWithSummary = [
{ id: '5', parentMessageId: '4', text: 'Message 5' },
{ id: '1', parentMessageId: null, text: 'Message 1', summary: 'Summary for Message 1' },
{ id: '4', parentMessageId: '3', text: 'Message 4', summary: 'Summary for Message 4' },
{ id: '2', parentMessageId: '1', text: 'Message 2', summary: 'Summary for Message 2' },
{ id: '3', parentMessageId: '2', text: 'Message 3', summary: 'Summary for Message 3' },
];
const result = TestClient.constructor.getMessagesForConversation({
messages: unorderedMessagesWithSummary,
parentMessageId: '5',
summary: true,
});
expect(result).toEqual([
{
id: '4',
parentMessageId: '3',
role: 'system',
text: 'Summary for Message 4',
summary: 'Summary for Message 4',
},
{ id: '5', parentMessageId: '4', text: 'Message 5' },
]);
});
});
describe('sendMessage', () => {
test('sendMessage should return a response message', async () => {
const expectedResult = expect.objectContaining({