Merge branch 'dev' into feat/context-window-ui

2026-02-16 07:28:09 +01:00 · 2025-12-29 02:07:54 +01:00 · 2025-12-29 02:07:54 +01:00 · cb8322ca85
commit cb8322ca85
parent 58f75bfa83 d0835d5222
407 changed files with 25479 additions and 19894 deletions
--- a/api/server/controllers/agents/callbacks.js
+++ b/api/server/controllers/agents/callbacks.js
@ -1,5 +1,5 @@
 const { nanoid } = require('nanoid');
-const { sendEvent } = require('@librechat/api');
+const { sendEvent, GenerationJobManager } = require('@librechat/api');
 const { logger } = require('@librechat/data-schemas');
 const { Tools, StepTypes, FileContext, ErrorTypes } = require('librechat-data-provider');
 const {
@ -144,17 +144,38 @@ function checkIfLastAgent(last_agent_id, langgraph_node) {
  return langgraph_node?.endsWith(last_agent_id);
 }

+/**
+ * Helper to emit events either to res (standard mode) or to job emitter (resumable mode).
+ * @param {ServerResponse} res - The server response object
+ * @param {string | null} streamId - The stream ID for resumable mode, or null for standard mode
+ * @param {Object} eventData - The event data to send
+ */
+function emitEvent(res, streamId, eventData) {
+  if (streamId) {
+    GenerationJobManager.emitChunk(streamId, eventData);
+  } else {
+    sendEvent(res, eventData);
+  }
+}
+
 /**
 * Get default handlers for stream events.
 * @param {Object} options - The options object.
- * @param {ServerResponse} options.res - The options object.
- * @param {ContentAggregator} options.aggregateContent - The options object.
+ * @param {ServerResponse} options.res - The server response object.
+ * @param {ContentAggregator} options.aggregateContent - Content aggregator function.
 * @param {ToolEndCallback} options.toolEndCallback - Callback to use when tool ends.
 * @param {Array<UsageMetadata>} options.collectedUsage - The list of collected usage metadata.
+ * @param {string | null} [options.streamId] - The stream ID for resumable mode, or null for standard mode.
 * @returns {Record<string, t.EventHandler>} The default handlers.
 * @throws {Error} If the request is not found.
 */
-function getDefaultHandlers({ res, aggregateContent, toolEndCallback, collectedUsage }) {
+function getDefaultHandlers({
+  res,
+  aggregateContent,
+  toolEndCallback,
+  collectedUsage,
+  streamId = null,
+}) {
  if (!res || !aggregateContent) {
    throw new Error(
      `[getDefaultHandlers] Missing required options: res: ${!res}, aggregateContent: ${!aggregateContent}`,
@ -173,16 +194,16 @@ function getDefaultHandlers({ res, aggregateContent, toolEndCallback, collectedU
       */
      handle: (event, data, metadata) => {
        if (data?.stepDetails.type === StepTypes.TOOL_CALLS) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (!metadata?.hide_sequential_outputs) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else {
          const agentName = metadata?.name ?? 'Agent';
          const isToolCall = data?.stepDetails.type === StepTypes.TOOL_CALLS;
          const action = isToolCall ? 'performing a task...' : 'thinking...';
-          sendEvent(res, {
+          emitEvent(res, streamId, {
            event: 'on_agent_update',
            data: {
              runId: metadata?.run_id,
@ -202,11 +223,11 @@ function getDefaultHandlers({ res, aggregateContent, toolEndCallback, collectedU
       */
      handle: (event, data, metadata) => {
        if (data?.delta.type === StepTypes.TOOL_CALLS) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (!metadata?.hide_sequential_outputs) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        }
        aggregateContent({ event, data });
      },
@ -220,11 +241,11 @@ function getDefaultHandlers({ res, aggregateContent, toolEndCallback, collectedU
       */
      handle: (event, data, metadata) => {
        if (data?.result != null) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (!metadata?.hide_sequential_outputs) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        }
        aggregateContent({ event, data });
      },
@ -238,9 +259,9 @@ function getDefaultHandlers({ res, aggregateContent, toolEndCallback, collectedU
       */
      handle: (event, data, metadata) => {
        if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (!metadata?.hide_sequential_outputs) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        }
        aggregateContent({ event, data });
      },
@ -254,9 +275,9 @@ function getDefaultHandlers({ res, aggregateContent, toolEndCallback, collectedU
       */
      handle: (event, data, metadata) => {
        if (checkIfLastAgent(metadata?.last_agent_id, metadata?.langgraph_node)) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        } else if (!metadata?.hide_sequential_outputs) {
-          sendEvent(res, { event, data });
+          emitEvent(res, streamId, { event, data });
        }
        aggregateContent({ event, data });
      },
@ -266,15 +287,30 @@ function getDefaultHandlers({ res, aggregateContent, toolEndCallback, collectedU
  return handlers;
 }

+/**
+ * Helper to write attachment events either to res or to job emitter.
+ * @param {ServerResponse} res - The server response object
+ * @param {string | null} streamId - The stream ID for resumable mode, or null for standard mode
+ * @param {Object} attachment - The attachment data
+ */
+function writeAttachment(res, streamId, attachment) {
+  if (streamId) {
+    GenerationJobManager.emitChunk(streamId, { event: 'attachment', data: attachment });
+  } else {
+    res.write(`event: attachment\ndata: ${JSON.stringify(attachment)}\n\n`);
+  }
+}
+
 /**
 *
 * @param {Object} params
 * @param {ServerRequest} params.req
 * @param {ServerResponse} params.res
 * @param {Promise<MongoFile | { filename: string; filepath: string; expires: number;} | null>[]} params.artifactPromises
+ * @param {string | null} [params.streamId] - The stream ID for resumable mode, or null for standard mode.
 * @returns {ToolEndCallback} The tool end callback.
 */
-function createToolEndCallback({ req, res, artifactPromises }) {
+function createToolEndCallback({ req, res, artifactPromises, streamId = null }) {
  /**
   * @type {ToolEndCallback}
   */
@ -302,10 +338,10 @@ function createToolEndCallback({ req, res, artifactPromises }) {
          if (!attachment) {
            return null;
          }
-          if (!res.headersSent) {
+          if (!streamId && !res.headersSent) {
            return attachment;
          }
-          res.write(`event: attachment\ndata: ${JSON.stringify(attachment)}\n\n`);
+          writeAttachment(res, streamId, attachment);
          return attachment;
        })().catch((error) => {
          logger.error('Error processing file citations:', error);
@ -314,8 +350,6 @@ function createToolEndCallback({ req, res, artifactPromises }) {
      );
    }

-    // TODO: a lot of duplicated code in createToolEndCallback
-    // we should refactor this to use a helper function in a follow-up PR
    if (output.artifact[Tools.ui_resources]) {
      artifactPromises.push(
        (async () => {
@ -326,10 +360,10 @@ function createToolEndCallback({ req, res, artifactPromises }) {
            conversationId: metadata.thread_id,
            [Tools.ui_resources]: output.artifact[Tools.ui_resources].data,
          };
-          if (!res.headersSent) {
+          if (!streamId && !res.headersSent) {
            return attachment;
          }
-          res.write(`event: attachment\ndata: ${JSON.stringify(attachment)}\n\n`);
+          writeAttachment(res, streamId, attachment);
          return attachment;
        })().catch((error) => {
          logger.error('Error processing artifact content:', error);
@ -348,10 +382,10 @@ function createToolEndCallback({ req, res, artifactPromises }) {
            conversationId: metadata.thread_id,
            [Tools.web_search]: { ...output.artifact[Tools.web_search] },
          };
-          if (!res.headersSent) {
+          if (!streamId && !res.headersSent) {
            return attachment;
          }
-          res.write(`event: attachment\ndata: ${JSON.stringify(attachment)}\n\n`);
+          writeAttachment(res, streamId, attachment);
          return attachment;
        })().catch((error) => {
          logger.error('Error processing artifact content:', error);
@ -388,7 +422,7 @@ function createToolEndCallback({ req, res, artifactPromises }) {
              toolCallId: output.tool_call_id,
              conversationId: metadata.thread_id,
            });
-            if (!res.headersSent) {
+            if (!streamId && !res.headersSent) {
              return fileMetadata;
            }

@ -396,7 +430,7 @@ function createToolEndCallback({ req, res, artifactPromises }) {
              return null;
            }

-            res.write(`event: attachment\ndata: ${JSON.stringify(fileMetadata)}\n\n`);
+            writeAttachment(res, streamId, fileMetadata);
            return fileMetadata;
          })().catch((error) => {
            logger.error('Error processing artifact content:', error);
@ -435,7 +469,7 @@ function createToolEndCallback({ req, res, artifactPromises }) {
            conversationId: metadata.thread_id,
            session_id: output.artifact.session_id,
          });
-          if (!res.headersSent) {
+          if (!streamId && !res.headersSent) {
            return fileMetadata;
          }

@ -443,7 +477,7 @@ function createToolEndCallback({ req, res, artifactPromises }) {
            return null;
          }

-          res.write(`event: attachment\ndata: ${JSON.stringify(fileMetadata)}\n\n`);
+          writeAttachment(res, streamId, fileMetadata);
          return fileMetadata;
        })().catch((error) => {
          logger.error('Error processing code output:', error);
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -14,6 +14,7 @@ const {
  getBalanceConfig,
  getProviderConfig,
  memoryInstructions,
+  GenerationJobManager,
  getTransactionsConfig,
  createMemoryProcessor,
  filterMalformedContentParts,
@ -36,14 +37,13 @@ const {
  EModelEndpoint,
  PermissionTypes,
  isAgentsEndpoint,
-  AgentCapabilities,
+  isEphemeralAgentId,
  bedrockInputSchema,
  removeNullishValues,
 } = require('librechat-data-provider');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const { createContextHandlers } = require('~/app/clients/prompts');
-const { checkCapability } = require('~/server/services/Config');
 const { getConvoFiles } = require('~/models/Conversation');
 const BaseClient = require('~/app/clients/BaseClient');
 const { getRoleByName } = require('~/models/Role');
@ -95,59 +95,101 @@ function logToolError(graph, error, toolId) {
  });
 }

+/** Regex pattern to match agent ID suffix (____N) */
+const AGENT_SUFFIX_PATTERN = /____(\d+)$/;
+
 /**
- * Applies agent labeling to conversation history when multi-agent patterns are detected.
- * Labels content parts by their originating agent to prevent identity confusion.
+ * Creates a mapMethod for getMessagesForConversation that processes agent content.
+ * - Strips agentId/groupId metadata from all content
+ * - For multi-agent: filters to primary agent content only (no suffix or lowest suffix)
+ * - For multi-agent: applies agent labels to content
 *
- * @param {TMessage[]} orderedMessages - The ordered conversation messages
- * @param {Agent} primaryAgent - The primary agent configuration
- * @param {Map<string, Agent>} agentConfigs - Map of additional agent configurations
- * @returns {TMessage[]} Messages with agent labels applied where appropriate
+ * @param {Agent} primaryAgent - Primary agent configuration
+ * @param {Map<string, Agent>} [agentConfigs] - Additional agent configurations
+ * @returns {(message: TMessage) => TMessage} Map method for processing messages
 */
-function applyAgentLabelsToHistory(orderedMessages, primaryAgent, agentConfigs) {
-  const shouldLabelByAgent = (primaryAgent.edges?.length ?? 0) > 0 || (agentConfigs?.size ?? 0) > 0;
-
-  if (!shouldLabelByAgent) {
-    return orderedMessages;
-  }
-
-  const processedMessages = [];
-
-  for (let i = 0; i < orderedMessages.length; i++) {
-    const message = orderedMessages[i];
-
-    /** @type {Record<string, string>} */
-    const agentNames = { [primaryAgent.id]: primaryAgent.name || 'Assistant' };
+function createMultiAgentMapper(primaryAgent, agentConfigs) {
+  const hasMultipleAgents = (primaryAgent.edges?.length ?? 0) > 0 || (agentConfigs?.size ?? 0) > 0;

+  /** @type {Record<string, string> | null} */
+  let agentNames = null;
+  if (hasMultipleAgents) {
+    agentNames = { [primaryAgent.id]: primaryAgent.name || 'Assistant' };
    if (agentConfigs) {
      for (const [agentId, agentConfig] of agentConfigs.entries()) {
        agentNames[agentId] = agentConfig.name || agentConfig.id;
      }
    }
-
-    if (
-      !message.isCreatedByUser &&
-      message.metadata?.agentIdMap &&
-      Array.isArray(message.content)
-    ) {
-      try {
-        const labeledContent = labelContentByAgent(
-          message.content,
-          message.metadata.agentIdMap,
-          agentNames,
-        );
-
-        processedMessages.push({ ...message, content: labeledContent });
-      } catch (error) {
-        logger.error('[AgentClient] Error applying agent labels to message:', error);
-        processedMessages.push(message);
-      }
-    } else {
-      processedMessages.push(message);
-    }
  }

-  return processedMessages;
+  return (message) => {
+    if (message.isCreatedByUser || !Array.isArray(message.content)) {
+      return message;
+    }
+
+    // Find primary agent ID (no suffix, or lowest suffix number) - only needed for multi-agent
+    let primaryAgentId = null;
+    let hasAgentMetadata = false;
+
+    if (hasMultipleAgents) {
+      let lowestSuffixIndex = Infinity;
+      for (const part of message.content) {
+        const agentId = part?.agentId;
+        if (!agentId) {
+          continue;
+        }
+        hasAgentMetadata = true;
+
+        const suffixMatch = agentId.match(AGENT_SUFFIX_PATTERN);
+        if (!suffixMatch) {
+          primaryAgentId = agentId;
+          break;
+        }
+        const suffixIndex = parseInt(suffixMatch[1], 10);
+        if (suffixIndex < lowestSuffixIndex) {
+          lowestSuffixIndex = suffixIndex;
+          primaryAgentId = agentId;
+        }
+      }
+    } else {
+      // Single agent: just check if any metadata exists
+      hasAgentMetadata = message.content.some((part) => part?.agentId || part?.groupId);
+    }
+
+    if (!hasAgentMetadata) {
+      return message;
+    }
+
+    try {
+      /** @type {Array<TMessageContentParts>} */
+      const filteredContent = [];
+      /** @type {Record<number, string>} */
+      const agentIdMap = {};
+
+      for (const part of message.content) {
+        const agentId = part?.agentId;
+        // For single agent: include all parts; for multi-agent: filter to primary
+        if (!hasMultipleAgents || !agentId || agentId === primaryAgentId) {
+          const newIndex = filteredContent.length;
+          const { agentId: _a, groupId: _g, ...cleanPart } = part;
+          filteredContent.push(cleanPart);
+          if (agentId && hasMultipleAgents) {
+            agentIdMap[newIndex] = agentId;
+          }
+        }
+      }
+
+      const finalContent =
+        Object.keys(agentIdMap).length > 0 && agentNames
+          ? labelContentByAgent(filteredContent, agentIdMap, agentNames)
+          : filteredContent;
+
+      return { ...message, content: finalContent };
+    } catch (error) {
+      logger.error('[AgentClient] Error processing multi-agent message:', error);
+      return message;
+    }
+  };
 }

 class AgentClient extends BaseClient {
@ -199,8 +241,6 @@ class AgentClient extends BaseClient {
    this.indexTokenCountMap = {};
    /** @type {(messages: BaseMessage[]) => Promise<void>} */
    this.processMemory;
-    /** @type {Record<number, string> | null} */
-    this.agentIdMap = null;
  }

  /**
@ -289,18 +329,13 @@ class AgentClient extends BaseClient {
    { instructions = null, additional_instructions = null },
    opts,
  ) {
-    let orderedMessages = this.constructor.getMessagesForConversation({
+    const orderedMessages = this.constructor.getMessagesForConversation({
      messages,
      parentMessageId,
      summary: this.shouldSummarize,
+      mapMethod: createMultiAgentMapper(this.options.agent, this.agentConfigs),
    });

-    orderedMessages = applyAgentLabelsToHistory(
-      orderedMessages,
-      this.options.agent,
-      this.agentConfigs,
-    );
-
    let payload;
    /** @type {number | undefined} */
    let promptTokens;
@ -552,10 +587,9 @@ class AgentClient extends BaseClient {
        agent: prelimAgent,
        allowedProviders,
        endpointOption: {
-          endpoint:
-            prelimAgent.id !== Constants.EPHEMERAL_AGENT_ID
-              ? EModelEndpoint.agents
-              : memoryConfig.agent?.provider,
+          endpoint: !isEphemeralAgentId(prelimAgent.id)
+            ? EModelEndpoint.agents
+            : memoryConfig.agent?.provider,
        },
      },
      {
@ -595,10 +629,12 @@ class AgentClient extends BaseClient {
    const userId = this.options.req.user.id + '';
    const messageId = this.responseMessageId + '';
    const conversationId = this.conversationId + '';
+    const streamId = this.options.req?._resumableStreamId || null;
    const [withoutKeys, processMemory] = await createMemoryProcessor({
      userId,
      config,
      messageId,
+      streamId,
      conversationId,
      memoryMethods: {
        setMemory: db.setMemory,
@ -692,9 +728,7 @@ class AgentClient extends BaseClient {
    });

    const completion = filterMalformedContentParts(this.contentParts);
-    const metadata = this.agentIdMap ? { agentIdMap: this.agentIdMap } : undefined;
-
-    return { completion, metadata };
+    return { completion };
  }

  /**
@ -890,12 +924,10 @@ class AgentClient extends BaseClient {
       */
      const runAgents = async (messages) => {
        const agents = [this.options.agent];
-        if (
-          this.agentConfigs &&
-          this.agentConfigs.size > 0 &&
-          ((this.options.agent.edges?.length ?? 0) > 0 ||
-            (await checkCapability(this.options.req, AgentCapabilities.chain)))
-        ) {
+        // Include additional agents when:
+        // - agentConfigs has agents (from addedConvo parallel execution or agent handoffs)
+        // - Agents without incoming edges become start nodes and run in parallel automatically
+        if (this.agentConfigs && this.agentConfigs.size > 0) {
          agents.push(...this.agentConfigs.values());
        }

@ -955,6 +987,12 @@ class AgentClient extends BaseClient {
        }

        this.run = run;
+
+        const streamId = this.options.req?._resumableStreamId;
+        if (streamId && run.Graph) {
+          GenerationJobManager.setGraph(streamId, run.Graph);
+        }
+
        if (userMCPAuthMap != null) {
          config.configurable.userMCPAuthMap = userMCPAuthMap;
        }
@ -985,24 +1023,6 @@ class AgentClient extends BaseClient {
          );
        });
      }
-
-      try {
-        /** Capture agent ID map if we have edges or multiple agents */
-        const shouldStoreAgentMap =
-          (this.options.agent.edges?.length ?? 0) > 0 || (this.agentConfigs?.size ?? 0) > 0;
-        if (shouldStoreAgentMap && run?.Graph) {
-          const contentPartAgentMap = run.Graph.getContentPartAgentMap();
-          if (contentPartAgentMap && contentPartAgentMap.size > 0) {
-            this.agentIdMap = Object.fromEntries(contentPartAgentMap);
-            logger.debug('[AgentClient] Captured agent ID map:', {
-              totalParts: this.contentParts.length,
-              mappedParts: Object.keys(this.agentIdMap).length,
-            });
-          }
-        }
-      } catch (error) {
-        logger.error('[AgentClient] Error capturing agent ID map:', error);
-      }
    } catch (err) {
      logger.error(
        '[api/server/controllers/agents/client.js #sendCompletion] Operation aborted',
--- a/api/server/controllers/agents/request.js
+++ b/api/server/controllers/agents/request.js
@ -2,14 +2,11 @@ const { logger } = require('@librechat/data-schemas');
 const { Constants } = require('librechat-data-provider');
 const {
  sendEvent,
+  GenerationJobManager,
  sanitizeFileForTransmit,
  sanitizeMessageForTransmit,
 } = require('@librechat/api');
-const {
-  handleAbortError,
-  createAbortController,
-  cleanupAbortController,
-} = require('~/server/middleware');
+const { handleAbortError } = require('~/server/middleware');
 const { disposeClient, clientRegistry, requestDataMap } = require('~/server/cleanup');
 const { saveMessage } = require('~/models');

@ -31,12 +28,16 @@ function createCloseHandler(abortController) {
  };
 }

-const AgentController = async (req, res, next, initializeClient, addTitle) => {
-  let {
+/**
+ * Resumable Agent Controller - Generation runs independently of HTTP connection.
+ * Returns streamId immediately, client subscribes separately via SSE.
+ */
+const ResumableAgentController = async (req, res, next, initializeClient, addTitle) => {
+  const {
    text,
    isRegenerate,
    endpointOption,
-    conversationId,
+    conversationId: reqConversationId,
    isContinued = false,
    editedContent = null,
    parentMessageId = null,
@ -44,18 +45,354 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
    responseMessageId: editedResponseMessageId = null,
  } = req.body;

-  let sender;
-  let abortKey;
+  const userId = req.user.id;
+
+  // Generate conversationId upfront if not provided - streamId === conversationId always
+  // Treat "new" as a placeholder that needs a real UUID (frontend may send "new" for new convos)
+  const conversationId =
+    !reqConversationId || reqConversationId === 'new' ? crypto.randomUUID() : reqConversationId;
+  const streamId = conversationId;
+
+  let client = null;
+
+  try {
+    const job = await GenerationJobManager.createJob(streamId, userId, conversationId);
+    req._resumableStreamId = streamId;
+
+    // Send JSON response IMMEDIATELY so client can connect to SSE stream
+    // This is critical: tool loading (MCP OAuth) may emit events that the client needs to receive
+    res.json({ streamId, conversationId, status: 'started' });
+
+    // Note: We no longer use res.on('close') to abort since we send JSON immediately.
+    // The response closes normally after res.json(), which is not an abort condition.
+    // Abort handling is done through GenerationJobManager via the SSE stream connection.
+
+    // Track if partial response was already saved to avoid duplicates
+    let partialResponseSaved = false;
+
+    /**
+     * Listen for all subscribers leaving to save partial response.
+     * This ensures the response is saved to DB even if all clients disconnect
+     * while generation continues.
+     *
+     * Note: The messageId used here falls back to `${userMessage.messageId}_` if the
+     * actual response messageId isn't available yet. The final response save will
+     * overwrite this with the complete response using the same messageId pattern.
+     */
+    job.emitter.on('allSubscribersLeft', async (aggregatedContent) => {
+      if (partialResponseSaved || !aggregatedContent || aggregatedContent.length === 0) {
+        return;
+      }
+
+      const resumeState = await GenerationJobManager.getResumeState(streamId);
+      if (!resumeState?.userMessage) {
+        logger.debug('[ResumableAgentController] No user message to save partial response for');
+        return;
+      }
+
+      partialResponseSaved = true;
+      const responseConversationId = resumeState.conversationId || conversationId;
+
+      try {
+        const partialMessage = {
+          messageId: resumeState.responseMessageId || `${resumeState.userMessage.messageId}_`,
+          conversationId: responseConversationId,
+          parentMessageId: resumeState.userMessage.messageId,
+          sender: client?.sender ?? 'AI',
+          content: aggregatedContent,
+          unfinished: true,
+          error: false,
+          isCreatedByUser: false,
+          user: userId,
+          endpoint: endpointOption.endpoint,
+          model: endpointOption.modelOptions?.model || endpointOption.model_parameters?.model,
+        };
+
+        if (req.body?.agent_id) {
+          partialMessage.agent_id = req.body.agent_id;
+        }
+
+        await saveMessage(req, partialMessage, {
+          context: 'api/server/controllers/agents/request.js - partial response on disconnect',
+        });
+
+        logger.debug(
+          `[ResumableAgentController] Saved partial response for ${streamId}, content parts: ${aggregatedContent.length}`,
+        );
+      } catch (error) {
+        logger.error('[ResumableAgentController] Error saving partial response:', error);
+        // Reset flag so we can try again if subscribers reconnect and leave again
+        partialResponseSaved = false;
+      }
+    });
+
+    /** @type {{ client: TAgentClient; userMCPAuthMap?: Record<string, Record<string, string>> }} */
+    const result = await initializeClient({
+      req,
+      res,
+      endpointOption,
+      // Use the job's abort controller signal - allows abort via GenerationJobManager.abortJob()
+      signal: job.abortController.signal,
+    });
+
+    if (job.abortController.signal.aborted) {
+      GenerationJobManager.completeJob(streamId, 'Request aborted during initialization');
+      return;
+    }
+
+    client = result.client;
+
+    if (client?.sender) {
+      GenerationJobManager.updateMetadata(streamId, { sender: client.sender });
+    }
+
+    // Store reference to client's contentParts - graph will be set when run is created
+    if (client?.contentParts) {
+      GenerationJobManager.setContentParts(streamId, client.contentParts);
+    }
+
+    let userMessage;
+
+    const getReqData = (data = {}) => {
+      if (data.userMessage) {
+        userMessage = data.userMessage;
+      }
+      // conversationId is pre-generated, no need to update from callback
+    };
+
+    // Start background generation - readyPromise resolves immediately now
+    // (sync mechanism handles late subscribers)
+    const startGeneration = async () => {
+      try {
+        // Short timeout as safety net - promise should already be resolved
+        await Promise.race([job.readyPromise, new Promise((resolve) => setTimeout(resolve, 100))]);
+      } catch (waitError) {
+        logger.warn(
+          `[ResumableAgentController] Error waiting for subscriber: ${waitError.message}`,
+        );
+      }
+
+      try {
+        const onStart = (userMsg, respMsgId, _isNewConvo) => {
+          userMessage = userMsg;
+
+          // Store userMessage and responseMessageId upfront for resume capability
+          GenerationJobManager.updateMetadata(streamId, {
+            responseMessageId: respMsgId,
+            userMessage: {
+              messageId: userMsg.messageId,
+              parentMessageId: userMsg.parentMessageId,
+              conversationId: userMsg.conversationId,
+              text: userMsg.text,
+            },
+          });
+
+          GenerationJobManager.emitChunk(streamId, {
+            created: true,
+            message: userMessage,
+            streamId,
+          });
+        };
+
+        const messageOptions = {
+          user: userId,
+          onStart,
+          getReqData,
+          isContinued,
+          isRegenerate,
+          editedContent,
+          conversationId,
+          parentMessageId,
+          abortController: job.abortController,
+          overrideParentMessageId,
+          isEdited: !!editedContent,
+          userMCPAuthMap: result.userMCPAuthMap,
+          responseMessageId: editedResponseMessageId,
+          progressOptions: {
+            res: {
+              write: () => true,
+              end: () => {},
+              headersSent: false,
+              writableEnded: false,
+            },
+          },
+        };
+
+        const response = await client.sendMessage(text, messageOptions);
+
+        const messageId = response.messageId;
+        const endpoint = endpointOption.endpoint;
+        response.endpoint = endpoint;
+
+        const databasePromise = response.databasePromise;
+        delete response.databasePromise;
+
+        const { conversation: convoData = {} } = await databasePromise;
+        const conversation = { ...convoData };
+        conversation.title =
+          conversation && !conversation.title ? null : conversation?.title || 'New Chat';
+
+        if (req.body.files && client.options?.attachments) {
+          userMessage.files = [];
+          const messageFiles = new Set(req.body.files.map((file) => file.file_id));
+          for (const attachment of client.options.attachments) {
+            if (messageFiles.has(attachment.file_id)) {
+              userMessage.files.push(sanitizeFileForTransmit(attachment));
+            }
+          }
+          delete userMessage.image_urls;
+        }
+
+        // Check abort state BEFORE calling completeJob (which triggers abort signal for cleanup)
+        const wasAbortedBeforeComplete = job.abortController.signal.aborted;
+        const isNewConvo = !reqConversationId || reqConversationId === 'new';
+        const shouldGenerateTitle =
+          addTitle &&
+          parentMessageId === Constants.NO_PARENT &&
+          isNewConvo &&
+          !wasAbortedBeforeComplete;
+
+        if (!wasAbortedBeforeComplete) {
+          const finalEvent = {
+            final: true,
+            conversation,
+            title: conversation.title,
+            requestMessage: sanitizeMessageForTransmit(userMessage),
+            responseMessage: { ...response },
+          };
+
+          GenerationJobManager.emitDone(streamId, finalEvent);
+          GenerationJobManager.completeJob(streamId);
+
+          if (client.savedMessageIds && !client.savedMessageIds.has(messageId)) {
+            await saveMessage(
+              req,
+              { ...response, user: userId },
+              { context: 'api/server/controllers/agents/request.js - resumable response end' },
+            );
+          }
+        } else {
+          const finalEvent = {
+            final: true,
+            conversation,
+            title: conversation.title,
+            requestMessage: sanitizeMessageForTransmit(userMessage),
+            responseMessage: { ...response, error: true },
+            error: { message: 'Request was aborted' },
+          };
+          GenerationJobManager.emitDone(streamId, finalEvent);
+          GenerationJobManager.completeJob(streamId, 'Request aborted');
+        }
+
+        if (!client.skipSaveUserMessage && userMessage) {
+          await saveMessage(req, userMessage, {
+            context: 'api/server/controllers/agents/request.js - resumable user message',
+          });
+        }
+
+        if (shouldGenerateTitle) {
+          addTitle(req, {
+            text,
+            response: { ...response },
+            client,
+          })
+            .catch((err) => {
+              logger.error('[ResumableAgentController] Error in title generation', err);
+            })
+            .finally(() => {
+              if (client) {
+                disposeClient(client);
+              }
+            });
+        } else {
+          if (client) {
+            disposeClient(client);
+          }
+        }
+      } catch (error) {
+        // Check if this was an abort (not a real error)
+        const wasAborted = job.abortController.signal.aborted || error.message?.includes('abort');
+
+        if (wasAborted) {
+          logger.debug(`[ResumableAgentController] Generation aborted for ${streamId}`);
+          // abortJob already handled emitDone and completeJob
+        } else {
+          logger.error(`[ResumableAgentController] Generation error for ${streamId}:`, error);
+          GenerationJobManager.emitError(streamId, error.message || 'Generation failed');
+          GenerationJobManager.completeJob(streamId, error.message);
+        }
+
+        if (client) {
+          disposeClient(client);
+        }
+
+        // Don't continue to title generation after error/abort
+        return;
+      }
+    };
+
+    // Start generation and handle any unhandled errors
+    startGeneration().catch((err) => {
+      logger.error(
+        `[ResumableAgentController] Unhandled error in background generation: ${err.message}`,
+      );
+      GenerationJobManager.completeJob(streamId, err.message);
+    });
+  } catch (error) {
+    logger.error('[ResumableAgentController] Initialization error:', error);
+    if (!res.headersSent) {
+      res.status(500).json({ error: error.message || 'Failed to start generation' });
+    } else {
+      // JSON already sent, emit error to stream so client can receive it
+      GenerationJobManager.emitError(streamId, error.message || 'Failed to start generation');
+    }
+    GenerationJobManager.completeJob(streamId, error.message);
+    if (client) {
+      disposeClient(client);
+    }
+  }
+};
+
+/**
+ * Agent Controller - Routes to ResumableAgentController for all requests.
+ * The legacy non-resumable path is kept below but no longer used by default.
+ */
+const AgentController = async (req, res, next, initializeClient, addTitle) => {
+  return ResumableAgentController(req, res, next, initializeClient, addTitle);
+};
+
+/**
+ * Legacy Non-resumable Agent Controller - Uses GenerationJobManager for abort handling.
+ * Response is streamed directly to client via res, but abort state is managed centrally.
+ * @deprecated Use ResumableAgentController instead
+ */
+const _LegacyAgentController = async (req, res, next, initializeClient, addTitle) => {
+  const {
+    text,
+    isRegenerate,
+    endpointOption,
+    conversationId: reqConversationId,
+    isContinued = false,
+    editedContent = null,
+    parentMessageId = null,
+    overrideParentMessageId = null,
+    responseMessageId: editedResponseMessageId = null,
+  } = req.body;
+
+  // Generate conversationId upfront if not provided - streamId === conversationId always
+  // Treat "new" as a placeholder that needs a real UUID (frontend may send "new" for new convos)
+  const conversationId =
+    !reqConversationId || reqConversationId === 'new' ? crypto.randomUUID() : reqConversationId;
+  const streamId = conversationId;
+
  let userMessage;
-  let promptTokens;
  let userMessageId;
  let responseMessageId;
-  let userMessagePromise;
-  let getAbortData;
  let client = null;
  let cleanupHandlers = [];

-  const newConvo = !conversationId;
+  // Match the same logic used for conversationId generation above
+  const isNewConvo = !reqConversationId || reqConversationId === 'new';
  const userId = req.user.id;

  // Create handler to avoid capturing the entire parent scope
@ -64,24 +401,20 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
      if (key === 'userMessage') {
        userMessage = data[key];
        userMessageId = data[key].messageId;
-      } else if (key === 'userMessagePromise') {
-        userMessagePromise = data[key];
      } else if (key === 'responseMessageId') {
        responseMessageId = data[key];
      } else if (key === 'promptTokens') {
-        promptTokens = data[key];
+        // Update job metadata with prompt tokens for abort handling
+        GenerationJobManager.updateMetadata(streamId, { promptTokens: data[key] });
      } else if (key === 'sender') {
-        sender = data[key];
-      } else if (key === 'abortKey') {
-        abortKey = data[key];
-      } else if (!conversationId && key === 'conversationId') {
-        conversationId = data[key];
+        GenerationJobManager.updateMetadata(streamId, { sender: data[key] });
      }
+      // conversationId is pre-generated, no need to update from callback
    }
  };

  // Create a function to handle final cleanup
-  const performCleanup = () => {
+  const performCleanup = async () => {
    logger.debug('[AgentController] Performing cleanup');
    if (Array.isArray(cleanupHandlers)) {
      for (const handler of cleanupHandlers) {
@ -95,10 +428,10 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
      }
    }

-    // Clean up abort controller
-    if (abortKey) {
-      logger.debug('[AgentController] Cleaning up abort controller');
-      cleanupAbortController(abortKey);
+    // Complete the job in GenerationJobManager
+    if (streamId) {
+      logger.debug('[AgentController] Completing job in GenerationJobManager');
+      await GenerationJobManager.completeJob(streamId);
    }

    // Dispose client properly
@ -110,11 +443,7 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
    client = null;
    getReqData = null;
    userMessage = null;
-    getAbortData = null;
-    endpointOption.agent = null;
-    endpointOption = null;
    cleanupHandlers = null;
-    userMessagePromise = null;

    // Clear request data map
    if (requestDataMap.has(req)) {
@ -136,6 +465,7 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
      }
    };
    cleanupHandlers.push(removePrelimHandler);
+
    /** @type {{ client: TAgentClient; userMCPAuthMap?: Record<string, Record<string, string>> }} */
    const result = await initializeClient({
      req,
@ -143,6 +473,7 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
      endpointOption,
      signal: prelimAbortController.signal,
    });
+
    if (prelimAbortController.signal?.aborted) {
      prelimAbortController = null;
      throw new Error('Request was aborted before initialization could complete');
@ -161,28 +492,24 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
    // Store request data in WeakMap keyed by req object
    requestDataMap.set(req, { client });

-    // Use WeakRef to allow GC but still access content if it exists
-    const contentRef = new WeakRef(client.contentParts || []);
+    // Create job in GenerationJobManager for abort handling
+    // streamId === conversationId (pre-generated above)
+    const job = await GenerationJobManager.createJob(streamId, userId, conversationId);

-    // Minimize closure scope - only capture small primitives and WeakRef
-    getAbortData = () => {
-      // Dereference WeakRef each time
-      const content = contentRef.deref();
+    // Store endpoint metadata for abort handling
+    GenerationJobManager.updateMetadata(streamId, {
+      endpoint: endpointOption.endpoint,
+      iconURL: endpointOption.iconURL,
+      model: endpointOption.modelOptions?.model || endpointOption.model_parameters?.model,
+      sender: client?.sender,
+    });

-      return {
-        sender,
-        content: content || [],
-        userMessage,
-        promptTokens,
-        conversationId,
-        userMessagePromise,
-        messageId: responseMessageId,
-        parentMessageId: overrideParentMessageId ?? userMessageId,
-      };
-    };
+    // Store content parts reference for abort
+    if (client?.contentParts) {
+      GenerationJobManager.setContentParts(streamId, client.contentParts);
+    }

-    const { abortController, onStart } = createAbortController(req, res, getAbortData, getReqData);
-    const closeHandler = createCloseHandler(abortController);
+    const closeHandler = createCloseHandler(job.abortController);
    res.on('close', closeHandler);
    cleanupHandlers.push(() => {
      try {
@ -192,6 +519,27 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
      }
    });

+    /**
+     * onStart callback - stores user message and response ID for abort handling
+     */
+    const onStart = (userMsg, respMsgId, _isNewConvo) => {
+      sendEvent(res, { message: userMsg, created: true });
+      userMessage = userMsg;
+      userMessageId = userMsg.messageId;
+      responseMessageId = respMsgId;
+
+      // Store metadata for abort handling (conversationId is pre-generated)
+      GenerationJobManager.updateMetadata(streamId, {
+        responseMessageId: respMsgId,
+        userMessage: {
+          messageId: userMsg.messageId,
+          parentMessageId: userMsg.parentMessageId,
+          conversationId,
+          text: userMsg.text,
+        },
+      });
+    };
+
    const messageOptions = {
      user: userId,
      onStart,
@ -201,7 +549,7 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
      editedContent,
      conversationId,
      parentMessageId,
-      abortController,
+      abortController: job.abortController,
      overrideParentMessageId,
      isEdited: !!editedContent,
      userMCPAuthMap: result.userMCPAuthMap,
@ -241,7 +589,7 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
    }

    // Only send if not aborted
-    if (!abortController.signal.aborted) {
+    if (!job.abortController.signal.aborted) {
      // Create a new response object with minimal copies
      const finalResponse = { ...response };

@ -292,7 +640,7 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
    }

    // Add title if needed - extract minimal data
-    if (addTitle && parentMessageId === Constants.NO_PARENT && newConvo) {
+    if (addTitle && parentMessageId === Constants.NO_PARENT && isNewConvo) {
      addTitle(req, {
        text,
        response: { ...response },
@ -315,7 +663,7 @@ const AgentController = async (req, res, next, initializeClient, addTitle) => {
    // Handle error without capturing much scope
    handleAbortError(res, req, error, {
      conversationId,
-      sender,
+      sender: client?.sender,
      messageId: responseMessageId,
      parentMessageId: overrideParentMessageId ?? userMessageId ?? parentMessageId,
      userMessageId,