mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
♻️ fix: Prevent Instructions from Removal when nearing Max Context (#5516)
* refactor: getMessagesWithinTokenLimit to accept params object * refactor: always include instructions in payload if provided * ci: remove obsolete test * refactor: update logoutUser to accept request object and handle session destruction * test: enhance getMessagesWithinTokenLimit tests for instruction handling
This commit is contained in:
parent
528ee62eb1
commit
4110209494
6 changed files with 185 additions and 83 deletions
|
|
@ -347,25 +347,38 @@ class BaseClient {
|
|||
* If the token limit would be exceeded by adding a message, that message is not added to the context and remains in the original array.
|
||||
* The method uses `push` and `pop` operations for efficient array manipulation, and reverses the context array at the end to maintain the original order of the messages.
|
||||
*
|
||||
* @param {Array} _messages - An array of messages, each with a `tokenCount` property. The messages should be ordered from oldest to newest.
|
||||
* @param {number} [maxContextTokens] - The max number of tokens allowed in the context. If not provided, defaults to `this.maxContextTokens`.
|
||||
* @returns {Object} An object with four properties: `context`, `summaryIndex`, `remainingContextTokens`, and `messagesToRefine`.
|
||||
* @param {Object} params
|
||||
* @param {TMessage[]} params.messages - An array of messages, each with a `tokenCount` property. The messages should be ordered from oldest to newest.
|
||||
* @param {number} [params.maxContextTokens] - The max number of tokens allowed in the context. If not provided, defaults to `this.maxContextTokens`.
|
||||
* @param {{ role: 'system', content: text, tokenCount: number }} [params.instructions] - Instructions already added to the context at index 0.
|
||||
* @returns {Promise<{
|
||||
* context: TMessage[],
|
||||
* remainingContextTokens: number,
|
||||
* messagesToRefine: TMessage[],
|
||||
* summaryIndex: number,
|
||||
* }>} An object with four properties: `context`, `summaryIndex`, `remainingContextTokens`, and `messagesToRefine`.
|
||||
* `context` is an array of messages that fit within the token limit.
|
||||
* `summaryIndex` is the index of the first message in the `messagesToRefine` array.
|
||||
* `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context.
|
||||
* `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
|
||||
*/
|
||||
async getMessagesWithinTokenLimit(_messages, maxContextTokens) {
|
||||
async getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, instructions }) {
|
||||
// Every reply is primed with <|start|>assistant<|message|>, so we
|
||||
// start with 3 tokens for the label after all messages have been counted.
|
||||
let currentTokenCount = 3;
|
||||
let summaryIndex = -1;
|
||||
let remainingContextTokens = maxContextTokens ?? this.maxContextTokens;
|
||||
let currentTokenCount = 3;
|
||||
const instructionsTokenCount = instructions?.tokenCount ?? 0;
|
||||
let remainingContextTokens =
|
||||
(maxContextTokens ?? this.maxContextTokens) - instructionsTokenCount;
|
||||
const messages = [..._messages];
|
||||
|
||||
const context = [];
|
||||
|
||||
if (currentTokenCount < remainingContextTokens) {
|
||||
while (messages.length > 0 && currentTokenCount < remainingContextTokens) {
|
||||
if (messages.length === 1 && instructions) {
|
||||
break;
|
||||
}
|
||||
const poppedMessage = messages.pop();
|
||||
const { tokenCount } = poppedMessage;
|
||||
|
||||
|
|
@ -379,6 +392,11 @@ class BaseClient {
|
|||
}
|
||||
}
|
||||
|
||||
if (instructions) {
|
||||
context.push(_messages[0]);
|
||||
messages.shift();
|
||||
}
|
||||
|
||||
const prunedMemory = messages;
|
||||
summaryIndex = prunedMemory.length - 1;
|
||||
remainingContextTokens -= currentTokenCount;
|
||||
|
|
@ -403,12 +421,18 @@ class BaseClient {
|
|||
if (instructions) {
|
||||
({ tokenCount, ..._instructions } = instructions);
|
||||
}
|
||||
|
||||
_instructions && logger.debug('[BaseClient] instructions tokenCount: ' + tokenCount);
|
||||
let payload = this.addInstructions(formattedMessages, _instructions);
|
||||
let orderedWithInstructions = this.addInstructions(orderedMessages, instructions);
|
||||
if (tokenCount && tokenCount > this.maxContextTokens) {
|
||||
const info = `${tokenCount} / ${this.maxContextTokens}`;
|
||||
const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`;
|
||||
logger.warn(`Instructions token count exceeds max token count (${info}).`);
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
if (this.clientName === EModelEndpoint.agents) {
|
||||
const { dbMessages, editedIndices } = truncateToolCallOutputs(
|
||||
orderedWithInstructions,
|
||||
orderedMessages,
|
||||
this.maxContextTokens,
|
||||
this.getTokenCountForMessage.bind(this),
|
||||
);
|
||||
|
|
@ -416,14 +440,19 @@ class BaseClient {
|
|||
if (editedIndices.length > 0) {
|
||||
logger.debug('[BaseClient] Truncated tool call outputs:', editedIndices);
|
||||
for (const index of editedIndices) {
|
||||
payload[index].content = dbMessages[index].content;
|
||||
formattedMessages[index].content = dbMessages[index].content;
|
||||
}
|
||||
orderedWithInstructions = dbMessages;
|
||||
orderedMessages = dbMessages;
|
||||
}
|
||||
}
|
||||
|
||||
let orderedWithInstructions = this.addInstructions(orderedMessages, instructions);
|
||||
|
||||
let { context, remainingContextTokens, messagesToRefine, summaryIndex } =
|
||||
await this.getMessagesWithinTokenLimit(orderedWithInstructions);
|
||||
await this.getMessagesWithinTokenLimit({
|
||||
messages: orderedWithInstructions,
|
||||
instructions,
|
||||
});
|
||||
|
||||
logger.debug('[BaseClient] Context Count (1/2)', {
|
||||
remainingContextTokens,
|
||||
|
|
@ -435,7 +464,9 @@ class BaseClient {
|
|||
let { shouldSummarize } = this;
|
||||
|
||||
// Calculate the difference in length to determine how many messages were discarded if any
|
||||
const { length } = payload;
|
||||
let payload;
|
||||
let { length } = formattedMessages;
|
||||
length += instructions != null ? 1 : 0;
|
||||
const diff = length - context.length;
|
||||
const firstMessage = orderedWithInstructions[0];
|
||||
const usePrevSummary =
|
||||
|
|
@ -445,18 +476,31 @@ class BaseClient {
|
|||
this.previous_summary.messageId === firstMessage.messageId;
|
||||
|
||||
if (diff > 0) {
|
||||
payload = payload.slice(diff);
|
||||
payload = formattedMessages.slice(diff);
|
||||
logger.debug(
|
||||
`[BaseClient] Difference between original payload (${length}) and context (${context.length}): ${diff}`,
|
||||
);
|
||||
}
|
||||
|
||||
payload = this.addInstructions(payload ?? formattedMessages, _instructions);
|
||||
|
||||
const latestMessage = orderedWithInstructions[orderedWithInstructions.length - 1];
|
||||
if (payload.length === 0 && !shouldSummarize && latestMessage) {
|
||||
const info = `${latestMessage.tokenCount} / ${this.maxContextTokens}`;
|
||||
const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`;
|
||||
logger.warn(`Prompt token count exceeds max token count (${info}).`);
|
||||
throw new Error(errorMessage);
|
||||
} else if (
|
||||
_instructions &&
|
||||
payload.length === 1 &&
|
||||
payload[0].content === _instructions.content
|
||||
) {
|
||||
const info = `${tokenCount + 3} / ${this.maxContextTokens}`;
|
||||
const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`;
|
||||
logger.warn(
|
||||
`Including instructions, the prompt token count exceeds remaining max token count (${info}).`,
|
||||
);
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
if (usePrevSummary) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue