♻️ fix: Prevent Instructions from Removal when nearing Max Context (#5516)

* refactor: getMessagesWithinTokenLimit to accept params object * refactor: always include instructions in payload if provided * ci: remove obsolete test * refactor: update logoutUser to accept request object and handle session destruction * test: enhance getMessagesWithinTokenLimit tests for instruction handling
2026-02-03 16:21:50 +01:00 · 2025-01-27 20:37:38 -05:00 · 2025-01-27 20:37:38 -05:00 · 4110209494
commit 4110209494
parent 528ee62eb1
6 changed files with 185 additions and 83 deletions
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -347,25 +347,38 @@ class BaseClient {
   * If the token limit would be exceeded by adding a message, that message is not added to the context and remains in the original array.
   * The method uses `push` and `pop` operations for efficient array manipulation, and reverses the context array at the end to maintain the original order of the messages.
   *
-   * @param {Array} _messages - An array of messages, each with a `tokenCount` property. The messages should be ordered from oldest to newest.
-   * @param {number} [maxContextTokens] - The max number of tokens allowed in the context. If not provided, defaults to `this.maxContextTokens`.
-   * @returns {Object} An object with four properties: `context`, `summaryIndex`, `remainingContextTokens`, and `messagesToRefine`.
+   * @param {Object} params
+   * @param {TMessage[]} params.messages - An array of messages, each with a `tokenCount` property. The messages should be ordered from oldest to newest.
+   * @param {number} [params.maxContextTokens] - The max number of tokens allowed in the context. If not provided, defaults to `this.maxContextTokens`.
+   * @param {{ role: 'system', content: text, tokenCount: number }} [params.instructions] - Instructions already added to the context at index 0.
+   * @returns {Promise<{
+   *  context: TMessage[],
+   *  remainingContextTokens: number,
+   *  messagesToRefine: TMessage[],
+   *  summaryIndex: number,
+   * }>} An object with four properties: `context`, `summaryIndex`, `remainingContextTokens`, and `messagesToRefine`.
   *    `context` is an array of messages that fit within the token limit.
   *    `summaryIndex` is the index of the first message in the `messagesToRefine` array.
   *    `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context.
   *    `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
   */
-  async getMessagesWithinTokenLimit(_messages, maxContextTokens) {
+  async getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, instructions }) {
    // Every reply is primed with <|start|>assistant<|message|>, so we
    // start with 3 tokens for the label after all messages have been counted.
-    let currentTokenCount = 3;
    let summaryIndex = -1;
-    let remainingContextTokens = maxContextTokens ?? this.maxContextTokens;
+    let currentTokenCount = 3;
+    const instructionsTokenCount = instructions?.tokenCount ?? 0;
+    let remainingContextTokens =
+      (maxContextTokens ?? this.maxContextTokens) - instructionsTokenCount;
    const messages = [..._messages];

    const context = [];
+
    if (currentTokenCount < remainingContextTokens) {
      while (messages.length > 0 && currentTokenCount < remainingContextTokens) {
+        if (messages.length === 1 && instructions) {
+          break;
+        }
        const poppedMessage = messages.pop();
        const { tokenCount } = poppedMessage;

@ -379,6 +392,11 @@ class BaseClient {
      }
    }

+    if (instructions) {
+      context.push(_messages[0]);
+      messages.shift();
+    }
+
    const prunedMemory = messages;
    summaryIndex = prunedMemory.length - 1;
    remainingContextTokens -= currentTokenCount;
@ -403,12 +421,18 @@ class BaseClient {
    if (instructions) {
      ({ tokenCount, ..._instructions } = instructions);
    }
+
    _instructions && logger.debug('[BaseClient] instructions tokenCount: ' + tokenCount);
-    let payload = this.addInstructions(formattedMessages, _instructions);
-    let orderedWithInstructions = this.addInstructions(orderedMessages, instructions);
+    if (tokenCount && tokenCount > this.maxContextTokens) {
+      const info = `${tokenCount} / ${this.maxContextTokens}`;
+      const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`;
+      logger.warn(`Instructions token count exceeds max token count (${info}).`);
+      throw new Error(errorMessage);
+    }
+
    if (this.clientName === EModelEndpoint.agents) {
      const { dbMessages, editedIndices } = truncateToolCallOutputs(
-        orderedWithInstructions,
+        orderedMessages,
        this.maxContextTokens,
        this.getTokenCountForMessage.bind(this),
      );
@ -416,14 +440,19 @@ class BaseClient {
      if (editedIndices.length > 0) {
        logger.debug('[BaseClient] Truncated tool call outputs:', editedIndices);
        for (const index of editedIndices) {
-          payload[index].content = dbMessages[index].content;
+          formattedMessages[index].content = dbMessages[index].content;
        }
-        orderedWithInstructions = dbMessages;
+        orderedMessages = dbMessages;
      }
    }

+    let orderedWithInstructions = this.addInstructions(orderedMessages, instructions);
+
    let { context, remainingContextTokens, messagesToRefine, summaryIndex } =
-      await this.getMessagesWithinTokenLimit(orderedWithInstructions);
+      await this.getMessagesWithinTokenLimit({
+        messages: orderedWithInstructions,
+        instructions,
+      });

    logger.debug('[BaseClient] Context Count (1/2)', {
      remainingContextTokens,
@ -435,7 +464,9 @@ class BaseClient {
    let { shouldSummarize } = this;

    // Calculate the difference in length to determine how many messages were discarded if any
-    const { length } = payload;
+    let payload;
+    let { length } = formattedMessages;
+    length += instructions != null ? 1 : 0;
    const diff = length - context.length;
    const firstMessage = orderedWithInstructions[0];
    const usePrevSummary =
@ -445,18 +476,31 @@ class BaseClient {
      this.previous_summary.messageId === firstMessage.messageId;

    if (diff > 0) {
-      payload = payload.slice(diff);
+      payload = formattedMessages.slice(diff);
      logger.debug(
        `[BaseClient] Difference between original payload (${length}) and context (${context.length}): ${diff}`,
      );
    }

+    payload = this.addInstructions(payload ?? formattedMessages, _instructions);
+
    const latestMessage = orderedWithInstructions[orderedWithInstructions.length - 1];
    if (payload.length === 0 && !shouldSummarize && latestMessage) {
      const info = `${latestMessage.tokenCount} / ${this.maxContextTokens}`;
      const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`;
      logger.warn(`Prompt token count exceeds max token count (${info}).`);
      throw new Error(errorMessage);
+    } else if (
+      _instructions &&
+      payload.length === 1 &&
+      payload[0].content === _instructions.content
+    ) {
+      const info = `${tokenCount + 3} / ${this.maxContextTokens}`;
+      const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`;
+      logger.warn(
+        `Including instructions, the prompt token count exceeds remaining max token count (${info}).`,
+      );
+      throw new Error(errorMessage);
    }

    if (usePrevSummary) {