mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
* refactor: pass model in message edit payload, use encoder in standalone util function * feat: add summaryBuffer helper * refactor(api/messages): use new countTokens helper and add auth middleware at top * wip: ConversationSummaryBufferMemory * refactor: move pre-generation helpers to prompts dir * chore: remove console log * chore: remove test as payload will no longer carry tokenCount * chore: update getMessagesWithinTokenLimit JSDoc * refactor: optimize getMessagesForConversation and also break on summary, feat(ci): getMessagesForConversation tests * refactor(getMessagesForConvo): count '00000000-0000-0000-0000-000000000000' as root message * chore: add newer model to token map * fix: condition was point to prop of array instead of message prop * refactor(BaseClient): use object for refineMessages param, rename 'summary' to 'summaryMessage', add previous_summary refactor(getMessagesWithinTokenLimit): replace text and tokenCount if should summarize, summary, and summaryTokenCount are present fix/refactor(handleContextStrategy): use the right comparison length for context diff, and replace payload first message when a summary is present * chore: log previous_summary if debugging * refactor(formatMessage): assume if role is defined that it's a valid value * refactor(getMessagesWithinTokenLimit): remove summary logic refactor(handleContextStrategy): add usePrevSummary logic in case only summary was pruned refactor(loadHistory): initial message query will return all ordered messages but keep track of the latest summary refactor(getMessagesForConversation): use object for single param, edit jsdoc, edit all files using the method refactor(ChatGPTClient): order messages before buildPrompt is called, TODO: add convoSumBuffMemory logic * fix: undefined handling and summarizing only when shouldRefineContext is true * chore(BaseClient): fix test results omitting system role for summaries and test edge case * chore: export summaryBuffer from index file * refactor(OpenAIClient/BaseClient): move refineMessages to subclass, implement LLM initialization for summaryBuffer * feat: add OPENAI_SUMMARIZE to enable summarizing, refactor: rename client prop 'shouldRefineContext' to 'shouldSummarize', change contextStrategy value to 'summarize' from 'refine' * refactor: rename refineMessages method to summarizeMessages for clarity * chore: clarify summary future intent in .env.example * refactor(initializeLLM): handle case for either 'model' or 'modelName' being passed * feat(gptPlugins): enable summarization for plugins * refactor(gptPlugins): utilize new initializeLLM method and formatting methods for messages, use payload array for currentMessages and assign pastMessages sooner * refactor(agents): use ConversationSummaryBufferMemory for both agent types * refactor(formatMessage): optimize original method for langchain, add helper function for langchain messages, add JSDocs and tests * refactor(summaryBuffer): add helper to createSummaryBufferMemory, and use new formatting helpers * fix: forgot to spread formatMessages also took opportunity to pluralize filename * refactor: pass memory to tools, namely openapi specs. not used and may never be used by new method but added for testing * ci(formatMessages): add more exhaustive checks for langchain messages * feat: add debug env var for OpenAI * chore: delete unnecessary comments * chore: add extra note about summary feature * fix: remove tokenCount from payload instructions * fix: test fail * fix: only pass instructions to payload when defined or not empty object * refactor: fromPromptMessages is deprecated, use renamed method fromMessages * refactor: use 'includes' instead of 'startsWith' for extended OpenRouter compatibility * fix(PluginsClient.buildPromptBody): handle undefined message strings * chore: log langchain titling error * feat: getModelMaxTokens helper * feat: tokenSplit helper * feat: summary prompts updated * fix: optimize _CUT_OFF_SUMMARIZER prompt * refactor(summaryBuffer): use custom summary prompt, allow prompt to be passed, pass humanPrefix and aiPrefix to memory, along with any future variables, rename messagesToRefine to context * fix(summaryBuffer): handle edge case where messagesToRefine exceeds summary context, refactor(BaseClient): allow custom maxContextTokens to be passed to getMessagesWithinTokenLimit, add defined check before unshifting summaryMessage, update shouldSummarize based on this refactor(OpenAIClient): use getModelMaxTokens, use cut-off message method for summary if no messages were left after pruning * fix(handleContextStrategy): handle case where incoming prompt is bigger than model context * chore: rename refinedContent to splitText * chore: remove unnecessary debug log
355 lines
11 KiB
JavaScript
355 lines
11 KiB
JavaScript
// const { Agent, ProxyAgent } = require('undici');
|
|
const BaseClient = require('./BaseClient');
|
|
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
|
const Anthropic = require('@anthropic-ai/sdk');
|
|
|
|
const HUMAN_PROMPT = '\n\nHuman:';
|
|
const AI_PROMPT = '\n\nAssistant:';
|
|
|
|
const tokenizersCache = {};
|
|
|
|
class AnthropicClient extends BaseClient {
|
|
constructor(apiKey, options = {}, cacheOptions = {}) {
|
|
super(apiKey, options, cacheOptions);
|
|
this.apiKey = apiKey || process.env.ANTHROPIC_API_KEY;
|
|
this.sender = 'Anthropic';
|
|
this.userLabel = HUMAN_PROMPT;
|
|
this.assistantLabel = AI_PROMPT;
|
|
this.setOptions(options);
|
|
}
|
|
|
|
setOptions(options) {
|
|
if (this.options && !this.options.replaceOptions) {
|
|
// nested options aren't spread properly, so we need to do this manually
|
|
this.options.modelOptions = {
|
|
...this.options.modelOptions,
|
|
...options.modelOptions,
|
|
};
|
|
delete options.modelOptions;
|
|
// now we can merge options
|
|
this.options = {
|
|
...this.options,
|
|
...options,
|
|
};
|
|
} else {
|
|
this.options = options;
|
|
}
|
|
|
|
const modelOptions = this.options.modelOptions || {};
|
|
this.modelOptions = {
|
|
...modelOptions,
|
|
// set some good defaults (check for undefined in some cases because they may be 0)
|
|
model: modelOptions.model || 'claude-1',
|
|
temperature: typeof modelOptions.temperature === 'undefined' ? 0.7 : modelOptions.temperature, // 0 - 1, 0.7 is recommended
|
|
topP: typeof modelOptions.topP === 'undefined' ? 0.7 : modelOptions.topP, // 0 - 1, default: 0.7
|
|
topK: typeof modelOptions.topK === 'undefined' ? 40 : modelOptions.topK, // 1-40, default: 40
|
|
stop: modelOptions.stop, // no stop method for now
|
|
};
|
|
|
|
this.maxContextTokens = this.options.maxContextTokens || 99999;
|
|
this.maxResponseTokens = this.modelOptions.maxOutputTokens || 1500;
|
|
this.maxPromptTokens =
|
|
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
|
|
|
|
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
|
|
throw new Error(
|
|
`maxPromptTokens + maxOutputTokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${
|
|
this.maxPromptTokens + this.maxResponseTokens
|
|
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`,
|
|
);
|
|
}
|
|
|
|
this.startToken = '||>';
|
|
this.endToken = '';
|
|
this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
|
|
|
|
if (!this.modelOptions.stop) {
|
|
const stopTokens = [this.startToken];
|
|
if (this.endToken && this.endToken !== this.startToken) {
|
|
stopTokens.push(this.endToken);
|
|
}
|
|
stopTokens.push(`${this.userLabel}`);
|
|
stopTokens.push('<|diff_marker|>');
|
|
|
|
this.modelOptions.stop = stopTokens;
|
|
}
|
|
|
|
return this;
|
|
}
|
|
|
|
getClient() {
|
|
if (this.options.reverseProxyUrl) {
|
|
return new Anthropic({
|
|
apiKey: this.apiKey,
|
|
baseURL: this.options.reverseProxyUrl,
|
|
});
|
|
} else {
|
|
return new Anthropic({
|
|
apiKey: this.apiKey,
|
|
});
|
|
}
|
|
}
|
|
|
|
async buildMessages(messages, parentMessageId) {
|
|
const orderedMessages = this.constructor.getMessagesForConversation({
|
|
messages,
|
|
parentMessageId,
|
|
});
|
|
if (this.options.debug) {
|
|
console.debug('AnthropicClient: orderedMessages', orderedMessages, parentMessageId);
|
|
}
|
|
|
|
const formattedMessages = orderedMessages.map((message) => ({
|
|
author: message.isCreatedByUser ? this.userLabel : this.assistantLabel,
|
|
content: message?.content ?? message.text,
|
|
}));
|
|
|
|
let lastAuthor = '';
|
|
let groupedMessages = [];
|
|
|
|
for (let message of formattedMessages) {
|
|
// If last author is not same as current author, add to new group
|
|
if (lastAuthor !== message.author) {
|
|
groupedMessages.push({
|
|
author: message.author,
|
|
content: [message.content],
|
|
});
|
|
lastAuthor = message.author;
|
|
// If same author, append content to the last group
|
|
} else {
|
|
groupedMessages[groupedMessages.length - 1].content.push(message.content);
|
|
}
|
|
}
|
|
|
|
let identityPrefix = '';
|
|
if (this.options.userLabel) {
|
|
identityPrefix = `\nHuman's name: ${this.options.userLabel}`;
|
|
}
|
|
|
|
if (this.options.modelLabel) {
|
|
identityPrefix = `${identityPrefix}\nYou are ${this.options.modelLabel}`;
|
|
}
|
|
|
|
let promptPrefix = (this.options.promptPrefix || '').trim();
|
|
if (promptPrefix) {
|
|
// If the prompt prefix doesn't end with the end token, add it.
|
|
if (!promptPrefix.endsWith(`${this.endToken}`)) {
|
|
promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`;
|
|
}
|
|
promptPrefix = `\nContext:\n${promptPrefix}`;
|
|
}
|
|
|
|
if (identityPrefix) {
|
|
promptPrefix = `${identityPrefix}${promptPrefix}`;
|
|
}
|
|
|
|
// Prompt AI to respond, empty if last message was from AI
|
|
let isEdited = lastAuthor === this.assistantLabel;
|
|
const promptSuffix = isEdited ? '' : `${promptPrefix}${this.assistantLabel}\n`;
|
|
let currentTokenCount = isEdited
|
|
? this.getTokenCount(promptPrefix)
|
|
: this.getTokenCount(promptSuffix);
|
|
|
|
let promptBody = '';
|
|
const maxTokenCount = this.maxPromptTokens;
|
|
|
|
const context = [];
|
|
|
|
// Iterate backwards through the messages, adding them to the prompt until we reach the max token count.
|
|
// Do this within a recursive async function so that it doesn't block the event loop for too long.
|
|
// Also, remove the next message when the message that puts us over the token limit is created by the user.
|
|
// Otherwise, remove only the exceeding message. This is due to Anthropic's strict payload rule to start with "Human:".
|
|
const nextMessage = {
|
|
remove: false,
|
|
tokenCount: 0,
|
|
messageString: '',
|
|
};
|
|
|
|
const buildPromptBody = async () => {
|
|
if (currentTokenCount < maxTokenCount && groupedMessages.length > 0) {
|
|
const message = groupedMessages.pop();
|
|
const isCreatedByUser = message.author === this.userLabel;
|
|
// Use promptPrefix if message is edited assistant'
|
|
const messagePrefix =
|
|
isCreatedByUser || !isEdited ? message.author : `${promptPrefix}${message.author}`;
|
|
const messageString = `${messagePrefix}\n${message.content}${this.endToken}\n`;
|
|
let newPromptBody = `${messageString}${promptBody}`;
|
|
|
|
context.unshift(message);
|
|
|
|
const tokenCountForMessage = this.getTokenCount(messageString);
|
|
const newTokenCount = currentTokenCount + tokenCountForMessage;
|
|
|
|
if (!isCreatedByUser) {
|
|
nextMessage.messageString = messageString;
|
|
nextMessage.tokenCount = tokenCountForMessage;
|
|
}
|
|
|
|
if (newTokenCount > maxTokenCount) {
|
|
if (!promptBody) {
|
|
// This is the first message, so we can't add it. Just throw an error.
|
|
throw new Error(
|
|
`Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`,
|
|
);
|
|
}
|
|
|
|
// Otherwise, ths message would put us over the token limit, so don't add it.
|
|
// if created by user, remove next message, otherwise remove only this message
|
|
if (isCreatedByUser) {
|
|
nextMessage.remove = true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
promptBody = newPromptBody;
|
|
currentTokenCount = newTokenCount;
|
|
|
|
// Switch off isEdited after using it for the first time
|
|
if (isEdited) {
|
|
isEdited = false;
|
|
}
|
|
|
|
// wait for next tick to avoid blocking the event loop
|
|
await new Promise((resolve) => setImmediate(resolve));
|
|
return buildPromptBody();
|
|
}
|
|
return true;
|
|
};
|
|
|
|
await buildPromptBody();
|
|
|
|
if (nextMessage.remove) {
|
|
promptBody = promptBody.replace(nextMessage.messageString, '');
|
|
currentTokenCount -= nextMessage.tokenCount;
|
|
context.shift();
|
|
}
|
|
|
|
let prompt = `${promptBody}${promptSuffix}`;
|
|
|
|
// Add 2 tokens for metadata after all messages have been counted.
|
|
currentTokenCount += 2;
|
|
|
|
// Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
|
|
this.modelOptions.maxOutputTokens = Math.min(
|
|
this.maxContextTokens - currentTokenCount,
|
|
this.maxResponseTokens,
|
|
);
|
|
|
|
return { prompt, context };
|
|
}
|
|
|
|
getCompletion() {
|
|
console.log('AnthropicClient doesn\'t use getCompletion (all handled in sendCompletion)');
|
|
}
|
|
|
|
async sendCompletion(payload, { onProgress, abortController }) {
|
|
if (!abortController) {
|
|
abortController = new AbortController();
|
|
}
|
|
|
|
const { signal } = abortController;
|
|
|
|
const modelOptions = { ...this.modelOptions };
|
|
if (typeof onProgress === 'function') {
|
|
modelOptions.stream = true;
|
|
}
|
|
|
|
const { debug } = this.options;
|
|
if (debug) {
|
|
console.debug();
|
|
console.debug(modelOptions);
|
|
console.debug();
|
|
}
|
|
|
|
const client = this.getClient();
|
|
const metadata = {
|
|
user_id: this.user,
|
|
};
|
|
|
|
let text = '';
|
|
const {
|
|
stream,
|
|
model,
|
|
temperature,
|
|
maxOutputTokens,
|
|
stop: stop_sequences,
|
|
topP: top_p,
|
|
topK: top_k,
|
|
} = this.modelOptions;
|
|
const requestOptions = {
|
|
prompt: payload,
|
|
model,
|
|
stream: stream || true,
|
|
max_tokens_to_sample: maxOutputTokens || 1500,
|
|
stop_sequences,
|
|
temperature,
|
|
metadata,
|
|
top_p,
|
|
top_k,
|
|
};
|
|
if (this.options.debug) {
|
|
console.log('AnthropicClient: requestOptions');
|
|
console.dir(requestOptions, { depth: null });
|
|
}
|
|
const response = await client.completions.create(requestOptions);
|
|
|
|
signal.addEventListener('abort', () => {
|
|
if (this.options.debug) {
|
|
console.log('AnthropicClient: message aborted!');
|
|
}
|
|
response.controller.abort();
|
|
});
|
|
|
|
for await (const completion of response) {
|
|
if (this.options.debug) {
|
|
// Uncomment to debug message stream
|
|
// console.debug(completion);
|
|
}
|
|
text += completion.completion;
|
|
onProgress(completion.completion);
|
|
}
|
|
|
|
signal.removeEventListener('abort', () => {
|
|
if (this.options.debug) {
|
|
console.log('AnthropicClient: message aborted!');
|
|
}
|
|
response.controller.abort();
|
|
});
|
|
|
|
return text.trim();
|
|
}
|
|
|
|
getSaveOptions() {
|
|
return {
|
|
promptPrefix: this.options.promptPrefix,
|
|
modelLabel: this.options.modelLabel,
|
|
...this.modelOptions,
|
|
};
|
|
}
|
|
|
|
getBuildMessagesOptions() {
|
|
if (this.options.debug) {
|
|
console.log('AnthropicClient doesn\'t use getBuildMessagesOptions');
|
|
}
|
|
}
|
|
|
|
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
|
|
if (tokenizersCache[encoding]) {
|
|
return tokenizersCache[encoding];
|
|
}
|
|
let tokenizer;
|
|
if (isModelName) {
|
|
tokenizer = encodingForModel(encoding, extendSpecialTokens);
|
|
} else {
|
|
tokenizer = getEncoding(encoding, extendSpecialTokens);
|
|
}
|
|
tokenizersCache[encoding] = tokenizer;
|
|
return tokenizer;
|
|
}
|
|
|
|
getTokenCount(text) {
|
|
return this.gptEncoder.encode(text, 'all').length;
|
|
}
|
|
}
|
|
|
|
module.exports = AnthropicClient;
|