LibreChat/api/app/clients/specs/FakeClient.js
Danny Avila 317a1bd8da
feat: ConversationSummaryBufferMemory (#973)
* refactor: pass model in message edit payload, use encoder in standalone util function

* feat: add summaryBuffer helper

* refactor(api/messages): use new countTokens helper and add auth middleware at top

* wip: ConversationSummaryBufferMemory

* refactor: move pre-generation helpers to prompts dir

* chore: remove console log

* chore: remove test as payload will no longer carry tokenCount

* chore: update getMessagesWithinTokenLimit JSDoc

* refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests

* refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message

* chore: add newer model to token map

* fix: condition was point to prop of array instead of message prop

* refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary
refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present
fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present

* chore: log previous_summary if debugging

* refactor(formatMessage): assume if role is defined that it's a valid value

* refactor(getMessagesWithinTokenLimit): remove summary logic
refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned
refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary
refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method
refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic

* fix: undefined handling and summarizing only when shouldRefineContext is true

* chore(BaseClient): fix test results omitting system role for summaries and test edge case

* chore: export summaryBuffer from index file

* refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer

* feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine'

* refactor: rename refineMessages method to summarizeMessages for clarity

* chore: clarify summary future intent in .env.example

* refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed

* feat(gptPlugins): enable summarization for plugins

* refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner

* refactor(agents): use ConversationSummaryBufferMemory for both agent types

* refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests

* refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers

* fix: forgot to spread formatMessages also took opportunity to pluralize filename

* refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing

* ci(formatMessages): add more exhaustive checks for langchain messages

* feat: add debug env var for OpenAI

* chore: delete unnecessary comments

* chore: add extra note about summary feature

* fix: remove tokenCount from payload instructions

* fix: test fail

* fix: only pass instructions to payload when defined or not empty object

* refactor: fromPromptMessages is deprecated, use renamed method fromMessages

* refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility

* fix(PluginsClient.buildPromptBody): handle undefined message strings

* chore: log langchain titling error

* feat: getModelMaxTokens helper

* feat: tokenSplit helper

* feat: summary prompts updated

* fix: optimize _CUT_OFF_SUMMARIZER prompt

* refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context

* fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context,
refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this
refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning

* fix(handleContextStrategy): handle case where incoming prompt is bigger than model context

* chore: rename refinedContent to splitText

* chore: remove unnecessary debug log
2023-09-26 21:02:28 -04:00

112 lines
3.2 KiB
JavaScript

const BaseClient = require('../BaseClient');
const { getModelMaxTokens } = require('../../../utils');
class FakeClient extends BaseClient {
constructor(apiKey, options = {}) {
super(apiKey, options);
this.sender = 'AI Assistant';
this.setOptions(options);
}
setOptions(options) {
if (this.options && !this.options.replaceOptions) {
this.options.modelOptions = {
...this.options.modelOptions,
...options.modelOptions,
};
delete options.modelOptions;
this.options = {
...this.options,
...options,
};
} else {
this.options = options;
}
if (this.options.openaiApiKey) {
this.apiKey = this.options.openaiApiKey;
}
const modelOptions = this.options.modelOptions || {};
if (!this.modelOptions) {
this.modelOptions = {
...modelOptions,
model: modelOptions.model || 'gpt-3.5-turbo',
temperature:
typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature,
top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p,
presence_penalty:
typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty,
stop: modelOptions.stop,
};
}
this.maxContextTokens = getModelMaxTokens(this.modelOptions.model) ?? 4097;
}
getCompletion() {}
buildMessages() {}
getTokenCount(str) {
return str.length;
}
getTokenCountForMessage(message) {
return message?.content?.length || message.length;
}
}
const initializeFakeClient = (apiKey, options, fakeMessages) => {
let TestClient = new FakeClient(apiKey);
TestClient.options = options;
TestClient.abortController = { abort: jest.fn() };
TestClient.saveMessageToDatabase = jest.fn();
TestClient.loadHistory = jest
.fn()
.mockImplementation((conversationId, parentMessageId = null) => {
if (!conversationId) {
TestClient.currentMessages = [];
return Promise.resolve([]);
}
const orderedMessages = TestClient.constructor.getMessagesForConversation({
messages: fakeMessages,
parentMessageId,
});
TestClient.currentMessages = orderedMessages;
return Promise.resolve(orderedMessages);
});
TestClient.getSaveOptions = jest.fn().mockImplementation(() => {
return {};
});
TestClient.getBuildMessagesOptions = jest.fn().mockImplementation(() => {
return {};
});
TestClient.sendCompletion = jest.fn(async () => {
return 'Mock response text';
});
TestClient.buildMessages = jest.fn(async (messages, parentMessageId) => {
const orderedMessages = TestClient.constructor.getMessagesForConversation({
messages,
parentMessageId,
});
const formattedMessages = orderedMessages.map((message) => {
let { role: _role, sender, text } = message;
const role = _role ?? sender;
const content = text ?? '';
return {
role: role?.toLowerCase() === 'user' ? 'user' : 'assistant',
content,
};
});
return {
prompt: formattedMessages,
tokenCountMap: null, // Simplified for the mock
};
});
return TestClient;
};
module.exports = { FakeClient, initializeFakeClient };