mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 17:00:15 +01:00
WIP: resumable stream
This commit is contained in:
parent
2522cf760f
commit
ff14cd3b44
12 changed files with 498 additions and 209 deletions
|
|
@ -66,6 +66,65 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
const job = GenerationJobManager.createJob(streamId, userId, reqConversationId);
|
||||
req._resumableStreamId = streamId;
|
||||
|
||||
// Track if partial response was already saved to avoid duplicates
|
||||
let partialResponseSaved = false;
|
||||
|
||||
/**
|
||||
* Listen for all subscribers leaving to save partial response.
|
||||
* This ensures the response is saved to DB even if all clients disconnect
|
||||
* while generation continues.
|
||||
*
|
||||
* Note: The messageId used here falls back to `${userMessage.messageId}_` if the
|
||||
* actual response messageId isn't available yet. The final response save will
|
||||
* overwrite this with the complete response using the same messageId pattern.
|
||||
*/
|
||||
job.emitter.on('allSubscribersLeft', async (aggregatedContent) => {
|
||||
if (partialResponseSaved || !aggregatedContent || aggregatedContent.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const resumeState = GenerationJobManager.getResumeState(streamId);
|
||||
if (!resumeState?.userMessage) {
|
||||
logger.debug('[ResumableAgentController] No user message to save partial response for');
|
||||
return;
|
||||
}
|
||||
|
||||
partialResponseSaved = true;
|
||||
const responseConversationId = resumeState.conversationId || reqConversationId;
|
||||
|
||||
try {
|
||||
const partialMessage = {
|
||||
messageId: resumeState.responseMessageId || `${resumeState.userMessage.messageId}_`,
|
||||
conversationId: responseConversationId,
|
||||
parentMessageId: resumeState.userMessage.messageId,
|
||||
sender: client?.sender ?? 'AI',
|
||||
content: aggregatedContent,
|
||||
unfinished: true,
|
||||
error: false,
|
||||
isCreatedByUser: false,
|
||||
user: userId,
|
||||
endpoint: endpointOption.endpoint,
|
||||
model: endpointOption.modelOptions?.model || endpointOption.model_parameters?.model,
|
||||
};
|
||||
|
||||
if (req.body?.agent_id) {
|
||||
partialMessage.agent_id = req.body.agent_id;
|
||||
}
|
||||
|
||||
await saveMessage(req, partialMessage, {
|
||||
context: 'api/server/controllers/agents/request.js - partial response on disconnect',
|
||||
});
|
||||
|
||||
logger.debug(
|
||||
`[ResumableAgentController] Saved partial response for ${streamId}, content parts: ${aggregatedContent.length}`,
|
||||
);
|
||||
} catch (error) {
|
||||
logger.error('[ResumableAgentController] Error saving partial response:', error);
|
||||
// Reset flag so we can try again if subscribers reconnect and leave again
|
||||
partialResponseSaved = false;
|
||||
}
|
||||
});
|
||||
|
||||
/** @type {{ client: TAgentClient; userMCPAuthMap?: Record<string, Record<string, string>> }} */
|
||||
const result = await initializeClient({
|
||||
req,
|
||||
|
|
@ -106,9 +165,14 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
}
|
||||
|
||||
try {
|
||||
const onStart = (userMsg, _respMsgId, _isNewConvo) => {
|
||||
const onStart = (userMsg, respMsgId, _isNewConvo) => {
|
||||
userMessage = userMsg;
|
||||
|
||||
// Store the response messageId upfront so partial saves use the same ID
|
||||
if (respMsgId) {
|
||||
GenerationJobManager.updateMetadata(streamId, { responseMessageId: respMsgId });
|
||||
}
|
||||
|
||||
GenerationJobManager.emitChunk(streamId, {
|
||||
created: true,
|
||||
message: userMessage,
|
||||
|
|
@ -203,8 +267,15 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
});
|
||||
}
|
||||
|
||||
// Skip title generation if job was aborted
|
||||
const newConvo = !reqConversationId;
|
||||
if (addTitle && parentMessageId === Constants.NO_PARENT && newConvo) {
|
||||
const shouldGenerateTitle =
|
||||
addTitle &&
|
||||
parentMessageId === Constants.NO_PARENT &&
|
||||
newConvo &&
|
||||
!job.abortController.signal.aborted;
|
||||
|
||||
if (shouldGenerateTitle) {
|
||||
addTitle(req, {
|
||||
text,
|
||||
response: { ...response },
|
||||
|
|
@ -224,12 +295,24 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`[ResumableAgentController] Generation error for ${streamId}:`, error);
|
||||
GenerationJobManager.emitError(streamId, error.message || 'Generation failed');
|
||||
GenerationJobManager.completeJob(streamId, error.message);
|
||||
// Check if this was an abort (not a real error)
|
||||
const wasAborted = job.abortController.signal.aborted || error.message?.includes('abort');
|
||||
|
||||
if (wasAborted) {
|
||||
logger.debug(`[ResumableAgentController] Generation aborted for ${streamId}`);
|
||||
// abortJob already handled emitDone and completeJob
|
||||
} else {
|
||||
logger.error(`[ResumableAgentController] Generation error for ${streamId}:`, error);
|
||||
GenerationJobManager.emitError(streamId, error.message || 'Generation failed');
|
||||
GenerationJobManager.completeJob(streamId, error.message);
|
||||
}
|
||||
|
||||
if (client) {
|
||||
disposeClient(client);
|
||||
}
|
||||
|
||||
// Don't continue to title generation after error/abort
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -23,9 +23,10 @@ async function buildEndpointOption(req, res, next) {
|
|||
try {
|
||||
parsedBody = parseCompactConvo({ endpoint, endpointType, conversation: req.body });
|
||||
} catch (error) {
|
||||
logger.warn(
|
||||
`Error parsing conversation for endpoint ${endpoint}${error?.message ? `: ${error.message}` : ''}`,
|
||||
);
|
||||
logger.error(`Error parsing compact conversation for endpoint ${endpoint}`, error);
|
||||
logger.debug({
|
||||
'Error parsing compact conversation': { endpoint, endpointType, conversation: req.body },
|
||||
});
|
||||
return handleError(res, { text: 'Error parsing conversation' });
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
const express = require('express');
|
||||
const { generateCheckAccess, skipAgentCheck, GenerationJobManager } = require('@librechat/api');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { generateCheckAccess, skipAgentCheck } = require('@librechat/api');
|
||||
const { PermissionTypes, Permissions, PermissionBits } = require('librechat-data-provider');
|
||||
const {
|
||||
setHeaders,
|
||||
|
|
@ -35,25 +34,6 @@ router.use(validateConvoAccess);
|
|||
router.use(buildEndpointOption);
|
||||
router.use(setHeaders);
|
||||
|
||||
/**
|
||||
* @route POST /abort
|
||||
* @desc Abort an ongoing generation job
|
||||
* @access Private
|
||||
*/
|
||||
router.post('/abort', (req, res) => {
|
||||
const { streamId, abortKey } = req.body;
|
||||
|
||||
const jobStreamId = streamId || abortKey?.split(':')?.[0];
|
||||
|
||||
if (jobStreamId && GenerationJobManager.hasJob(jobStreamId)) {
|
||||
GenerationJobManager.abortJob(jobStreamId);
|
||||
logger.debug(`[AgentStream] Job aborted: ${jobStreamId}`);
|
||||
return res.json({ success: true, aborted: jobStreamId });
|
||||
}
|
||||
|
||||
res.status(404).json({ error: 'Job not found' });
|
||||
});
|
||||
|
||||
const controller = async (req, res, next) => {
|
||||
await AgentController(req, res, next, initializeClient, addTitle);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -32,10 +32,12 @@ router.use('/', v1);
|
|||
* @route GET /chat/stream/:streamId
|
||||
* @desc Subscribe to an ongoing generation job's SSE stream with replay support
|
||||
* @access Private
|
||||
* @description Replays any chunks missed during disconnect, then streams live
|
||||
* @description Sends sync event with resume state, replays missed chunks, then streams live
|
||||
* @query resume=true - Indicates this is a reconnection (sends sync event)
|
||||
*/
|
||||
router.get('/chat/stream/:streamId', (req, res) => {
|
||||
const { streamId } = req.params;
|
||||
const isResume = req.query.resume === 'true';
|
||||
|
||||
const job = GenerationJobManager.getJob(streamId);
|
||||
if (!job) {
|
||||
|
|
@ -52,7 +54,22 @@ router.get('/chat/stream/:streamId', (req, res) => {
|
|||
res.setHeader('X-Accel-Buffering', 'no');
|
||||
res.flushHeaders();
|
||||
|
||||
logger.debug(`[AgentStream] Client subscribed to ${streamId}`);
|
||||
logger.debug(`[AgentStream] Client subscribed to ${streamId}, resume: ${isResume}`);
|
||||
|
||||
// Send sync event with resume state for reconnecting clients
|
||||
if (isResume && !GenerationJobManager.wasSyncSent(streamId)) {
|
||||
const resumeState = GenerationJobManager.getResumeState(streamId);
|
||||
if (resumeState && !res.writableEnded) {
|
||||
res.write(`event: message\ndata: ${JSON.stringify({ sync: true, resumeState })}\n\n`);
|
||||
if (typeof res.flush === 'function') {
|
||||
res.flush();
|
||||
}
|
||||
GenerationJobManager.markSyncSent(streamId);
|
||||
logger.debug(
|
||||
`[AgentStream] Sent sync event for ${streamId} with ${resumeState.runSteps.length} run steps`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const result = GenerationJobManager.subscribe(
|
||||
streamId,
|
||||
|
|
@ -98,7 +115,7 @@ router.get('/chat/stream/:streamId', (req, res) => {
|
|||
* @route GET /chat/status/:conversationId
|
||||
* @desc Check if there's an active generation job for a conversation
|
||||
* @access Private
|
||||
* @returns { active, streamId, status, chunkCount, aggregatedContent, createdAt }
|
||||
* @returns { active, streamId, status, chunkCount, aggregatedContent, createdAt, resumeState }
|
||||
*/
|
||||
router.get('/chat/status/:conversationId', (req, res) => {
|
||||
const { conversationId } = req.params;
|
||||
|
|
@ -114,17 +131,47 @@ router.get('/chat/status/:conversationId', (req, res) => {
|
|||
}
|
||||
|
||||
const info = GenerationJobManager.getStreamInfo(job.streamId);
|
||||
const resumeState = GenerationJobManager.getResumeState(job.streamId);
|
||||
|
||||
res.json({
|
||||
active: info?.active ?? false,
|
||||
streamId: job.streamId,
|
||||
status: info?.status ?? job.status,
|
||||
chunkCount: info?.chunkCount ?? 0,
|
||||
runStepCount: info?.runStepCount ?? 0,
|
||||
aggregatedContent: info?.aggregatedContent,
|
||||
createdAt: info?.createdAt ?? job.createdAt,
|
||||
resumeState,
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* @route POST /chat/abort
|
||||
* @desc Abort an ongoing generation job
|
||||
* @access Private
|
||||
* @description Mounted before chatRouter to bypass buildEndpointOption middleware
|
||||
*/
|
||||
router.post('/chat/abort', (req, res) => {
|
||||
logger.debug(`[AgentStream] ========== ABORT ENDPOINT HIT ==========`);
|
||||
logger.debug(`[AgentStream] Method: ${req.method}, Path: ${req.path}`);
|
||||
logger.debug(`[AgentStream] Body:`, req.body);
|
||||
|
||||
const { streamId, abortKey } = req.body;
|
||||
|
||||
const jobStreamId = streamId || abortKey?.split(':')?.[0];
|
||||
logger.debug(`[AgentStream] Computed jobStreamId: ${jobStreamId}`);
|
||||
|
||||
if (jobStreamId && GenerationJobManager.hasJob(jobStreamId)) {
|
||||
logger.debug(`[AgentStream] Job found, aborting: ${jobStreamId}`);
|
||||
GenerationJobManager.abortJob(jobStreamId);
|
||||
logger.debug(`[AgentStream] Job aborted successfully: ${jobStreamId}`);
|
||||
return res.json({ success: true, aborted: jobStreamId });
|
||||
}
|
||||
|
||||
logger.warn(`[AgentStream] Job not found for streamId: ${jobStreamId}`);
|
||||
return res.status(404).json({ error: 'Job not found', streamId: jobStreamId });
|
||||
});
|
||||
|
||||
const chatRouter = express.Router();
|
||||
chatRouter.use(configMiddleware);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue