WIP: resumable stream

This commit is contained in:
Danny Avila 2025-12-11 09:52:15 -05:00
parent 2522cf760f
commit ff14cd3b44
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
12 changed files with 498 additions and 209 deletions

View file

@ -66,6 +66,65 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
const job = GenerationJobManager.createJob(streamId, userId, reqConversationId);
req._resumableStreamId = streamId;
// Track if partial response was already saved to avoid duplicates
let partialResponseSaved = false;
/**
* Listen for all subscribers leaving to save partial response.
* This ensures the response is saved to DB even if all clients disconnect
* while generation continues.
*
* Note: The messageId used here falls back to `${userMessage.messageId}_` if the
* actual response messageId isn't available yet. The final response save will
* overwrite this with the complete response using the same messageId pattern.
*/
job.emitter.on('allSubscribersLeft', async (aggregatedContent) => {
if (partialResponseSaved || !aggregatedContent || aggregatedContent.length === 0) {
return;
}
const resumeState = GenerationJobManager.getResumeState(streamId);
if (!resumeState?.userMessage) {
logger.debug('[ResumableAgentController] No user message to save partial response for');
return;
}
partialResponseSaved = true;
const responseConversationId = resumeState.conversationId || reqConversationId;
try {
const partialMessage = {
messageId: resumeState.responseMessageId || `${resumeState.userMessage.messageId}_`,
conversationId: responseConversationId,
parentMessageId: resumeState.userMessage.messageId,
sender: client?.sender ?? 'AI',
content: aggregatedContent,
unfinished: true,
error: false,
isCreatedByUser: false,
user: userId,
endpoint: endpointOption.endpoint,
model: endpointOption.modelOptions?.model || endpointOption.model_parameters?.model,
};
if (req.body?.agent_id) {
partialMessage.agent_id = req.body.agent_id;
}
await saveMessage(req, partialMessage, {
context: 'api/server/controllers/agents/request.js - partial response on disconnect',
});
logger.debug(
`[ResumableAgentController] Saved partial response for ${streamId}, content parts: ${aggregatedContent.length}`,
);
} catch (error) {
logger.error('[ResumableAgentController] Error saving partial response:', error);
// Reset flag so we can try again if subscribers reconnect and leave again
partialResponseSaved = false;
}
});
/** @type {{ client: TAgentClient; userMCPAuthMap?: Record<string, Record<string, string>> }} */
const result = await initializeClient({
req,
@ -106,9 +165,14 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
}
try {
const onStart = (userMsg, _respMsgId, _isNewConvo) => {
const onStart = (userMsg, respMsgId, _isNewConvo) => {
userMessage = userMsg;
// Store the response messageId upfront so partial saves use the same ID
if (respMsgId) {
GenerationJobManager.updateMetadata(streamId, { responseMessageId: respMsgId });
}
GenerationJobManager.emitChunk(streamId, {
created: true,
message: userMessage,
@ -203,8 +267,15 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
});
}
// Skip title generation if job was aborted
const newConvo = !reqConversationId;
if (addTitle && parentMessageId === Constants.NO_PARENT && newConvo) {
const shouldGenerateTitle =
addTitle &&
parentMessageId === Constants.NO_PARENT &&
newConvo &&
!job.abortController.signal.aborted;
if (shouldGenerateTitle) {
addTitle(req, {
text,
response: { ...response },
@ -224,12 +295,24 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
}
}
} catch (error) {
logger.error(`[ResumableAgentController] Generation error for ${streamId}:`, error);
GenerationJobManager.emitError(streamId, error.message || 'Generation failed');
GenerationJobManager.completeJob(streamId, error.message);
// Check if this was an abort (not a real error)
const wasAborted = job.abortController.signal.aborted || error.message?.includes('abort');
if (wasAborted) {
logger.debug(`[ResumableAgentController] Generation aborted for ${streamId}`);
// abortJob already handled emitDone and completeJob
} else {
logger.error(`[ResumableAgentController] Generation error for ${streamId}:`, error);
GenerationJobManager.emitError(streamId, error.message || 'Generation failed');
GenerationJobManager.completeJob(streamId, error.message);
}
if (client) {
disposeClient(client);
}
// Don't continue to title generation after error/abort
return;
}
};

View file

@ -23,9 +23,10 @@ async function buildEndpointOption(req, res, next) {
try {
parsedBody = parseCompactConvo({ endpoint, endpointType, conversation: req.body });
} catch (error) {
logger.warn(
`Error parsing conversation for endpoint ${endpoint}${error?.message ? `: ${error.message}` : ''}`,
);
logger.error(`Error parsing compact conversation for endpoint ${endpoint}`, error);
logger.debug({
'Error parsing compact conversation': { endpoint, endpointType, conversation: req.body },
});
return handleError(res, { text: 'Error parsing conversation' });
}

View file

@ -1,6 +1,5 @@
const express = require('express');
const { generateCheckAccess, skipAgentCheck, GenerationJobManager } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { generateCheckAccess, skipAgentCheck } = require('@librechat/api');
const { PermissionTypes, Permissions, PermissionBits } = require('librechat-data-provider');
const {
setHeaders,
@ -35,25 +34,6 @@ router.use(validateConvoAccess);
router.use(buildEndpointOption);
router.use(setHeaders);
/**
* @route POST /abort
* @desc Abort an ongoing generation job
* @access Private
*/
router.post('/abort', (req, res) => {
const { streamId, abortKey } = req.body;
const jobStreamId = streamId || abortKey?.split(':')?.[0];
if (jobStreamId && GenerationJobManager.hasJob(jobStreamId)) {
GenerationJobManager.abortJob(jobStreamId);
logger.debug(`[AgentStream] Job aborted: ${jobStreamId}`);
return res.json({ success: true, aborted: jobStreamId });
}
res.status(404).json({ error: 'Job not found' });
});
const controller = async (req, res, next) => {
await AgentController(req, res, next, initializeClient, addTitle);
};

View file

@ -32,10 +32,12 @@ router.use('/', v1);
* @route GET /chat/stream/:streamId
* @desc Subscribe to an ongoing generation job's SSE stream with replay support
* @access Private
* @description Replays any chunks missed during disconnect, then streams live
* @description Sends sync event with resume state, replays missed chunks, then streams live
* @query resume=true - Indicates this is a reconnection (sends sync event)
*/
router.get('/chat/stream/:streamId', (req, res) => {
const { streamId } = req.params;
const isResume = req.query.resume === 'true';
const job = GenerationJobManager.getJob(streamId);
if (!job) {
@ -52,7 +54,22 @@ router.get('/chat/stream/:streamId', (req, res) => {
res.setHeader('X-Accel-Buffering', 'no');
res.flushHeaders();
logger.debug(`[AgentStream] Client subscribed to ${streamId}`);
logger.debug(`[AgentStream] Client subscribed to ${streamId}, resume: ${isResume}`);
// Send sync event with resume state for reconnecting clients
if (isResume && !GenerationJobManager.wasSyncSent(streamId)) {
const resumeState = GenerationJobManager.getResumeState(streamId);
if (resumeState && !res.writableEnded) {
res.write(`event: message\ndata: ${JSON.stringify({ sync: true, resumeState })}\n\n`);
if (typeof res.flush === 'function') {
res.flush();
}
GenerationJobManager.markSyncSent(streamId);
logger.debug(
`[AgentStream] Sent sync event for ${streamId} with ${resumeState.runSteps.length} run steps`,
);
}
}
const result = GenerationJobManager.subscribe(
streamId,
@ -98,7 +115,7 @@ router.get('/chat/stream/:streamId', (req, res) => {
* @route GET /chat/status/:conversationId
* @desc Check if there's an active generation job for a conversation
* @access Private
* @returns { active, streamId, status, chunkCount, aggregatedContent, createdAt }
* @returns { active, streamId, status, chunkCount, aggregatedContent, createdAt, resumeState }
*/
router.get('/chat/status/:conversationId', (req, res) => {
const { conversationId } = req.params;
@ -114,17 +131,47 @@ router.get('/chat/status/:conversationId', (req, res) => {
}
const info = GenerationJobManager.getStreamInfo(job.streamId);
const resumeState = GenerationJobManager.getResumeState(job.streamId);
res.json({
active: info?.active ?? false,
streamId: job.streamId,
status: info?.status ?? job.status,
chunkCount: info?.chunkCount ?? 0,
runStepCount: info?.runStepCount ?? 0,
aggregatedContent: info?.aggregatedContent,
createdAt: info?.createdAt ?? job.createdAt,
resumeState,
});
});
/**
* @route POST /chat/abort
* @desc Abort an ongoing generation job
* @access Private
* @description Mounted before chatRouter to bypass buildEndpointOption middleware
*/
router.post('/chat/abort', (req, res) => {
logger.debug(`[AgentStream] ========== ABORT ENDPOINT HIT ==========`);
logger.debug(`[AgentStream] Method: ${req.method}, Path: ${req.path}`);
logger.debug(`[AgentStream] Body:`, req.body);
const { streamId, abortKey } = req.body;
const jobStreamId = streamId || abortKey?.split(':')?.[0];
logger.debug(`[AgentStream] Computed jobStreamId: ${jobStreamId}`);
if (jobStreamId && GenerationJobManager.hasJob(jobStreamId)) {
logger.debug(`[AgentStream] Job found, aborting: ${jobStreamId}`);
GenerationJobManager.abortJob(jobStreamId);
logger.debug(`[AgentStream] Job aborted successfully: ${jobStreamId}`);
return res.json({ success: true, aborted: jobStreamId });
}
logger.warn(`[AgentStream] Job not found for streamId: ${jobStreamId}`);
return res.status(404).json({ error: 'Job not found', streamId: jobStreamId });
});
const chatRouter = express.Router();
chatRouter.use(configMiddleware);