LibreChat/api/server/routes/agents/index.js
Danny Avila a7aa4dc91b
🚦 refactor: Concurrent Request Limiter for Resumable Streams (#11167)
* feat: Implement concurrent request handling in ResumableAgentController

- Introduced a new concurrency management system by adding `checkAndIncrementPendingRequest` and `decrementPendingRequest` functions to manage user request limits.
- Replaced the previous `concurrentLimiter` middleware with a more integrated approach directly within the `ResumableAgentController`.
- Enhanced violation logging and request denial for users exceeding their concurrent request limits.
- Removed the obsolete `concurrentLimiter` middleware file and updated related imports across the codebase.

* refactor: Simplify error handling in ResumableAgentController and enhance SSE error management

- Removed the `denyRequest` middleware and replaced it with a direct response for concurrent request violations in the ResumableAgentController.
- Improved error handling in the `useResumableSSE` hook to differentiate between network errors and other error types, ensuring more informative error responses are sent to the error handler.

* test: Enhance MCP server configuration tests with new mocks and improved logging

- Added mocks for MCP server registry and manager in `index.spec.js` to facilitate testing of server configurations.
- Updated debug logging in `initializeMCPs.spec.js` to simplify messages regarding server configurations, improving clarity in test outputs.

* refactor: Enhance concurrency management in request handling

- Updated `checkAndIncrementPendingRequest` and `decrementPendingRequest` functions to utilize Redis for atomic request counting, improving concurrency control.
- Added error handling for Redis operations to ensure requests can proceed even during Redis failures.
- Streamlined cache key generation for both Redis and in-memory fallback, enhancing clarity and performance in managing pending requests.
- Improved comments and documentation for better understanding of the concurrency logic and its implications.

* refactor: Improve atomicity in Redis operations for pending request management

- Updated `checkAndIncrementPendingRequest` to utilize Redis pipelines for atomic INCR and EXPIRE operations, enhancing concurrency control and preventing edge cases.
- Added error handling for pipeline execution failures to ensure robust request management.
- Improved comments for clarity on the concurrency logic and its implications.
2026-01-01 11:10:56 -05:00

221 lines
7.1 KiB
JavaScript

const express = require('express');
const { isEnabled, GenerationJobManager } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const {
uaParser,
checkBan,
requireJwtAuth,
messageIpLimiter,
configMiddleware,
messageUserLimiter,
} = require('~/server/middleware');
const { v1 } = require('./v1');
const chat = require('./chat');
const { LIMIT_MESSAGE_IP, LIMIT_MESSAGE_USER } = process.env ?? {};
const router = express.Router();
router.use(requireJwtAuth);
router.use(checkBan);
router.use(uaParser);
router.use('/', v1);
/**
* Stream endpoints - mounted before chatRouter to bypass rate limiters
* These are GET requests and don't need message body validation or rate limiting
*/
/**
* @route GET /chat/stream/:streamId
* @desc Subscribe to an ongoing generation job's SSE stream with replay support
* @access Private
* @description Sends sync event with resume state, replays missed chunks, then streams live
* @query resume=true - Indicates this is a reconnection (sends sync event)
*/
router.get('/chat/stream/:streamId', async (req, res) => {
const { streamId } = req.params;
const isResume = req.query.resume === 'true';
const job = await GenerationJobManager.getJob(streamId);
if (!job) {
return res.status(404).json({
error: 'Stream not found',
message: 'The generation job does not exist or has expired.',
});
}
res.setHeader('Content-Encoding', 'identity');
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache, no-transform');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no');
res.flushHeaders();
logger.debug(`[AgentStream] Client subscribed to ${streamId}, resume: ${isResume}`);
// Send sync event with resume state for ALL reconnecting clients
// This supports multi-tab scenarios where each tab needs run step data
if (isResume) {
const resumeState = await GenerationJobManager.getResumeState(streamId);
if (resumeState && !res.writableEnded) {
// Send sync event with run steps AND aggregatedContent
// Client will use aggregatedContent to initialize message state
res.write(`event: message\ndata: ${JSON.stringify({ sync: true, resumeState })}\n\n`);
if (typeof res.flush === 'function') {
res.flush();
}
logger.debug(
`[AgentStream] Sent sync event for ${streamId} with ${resumeState.runSteps.length} run steps`,
);
}
}
const result = await GenerationJobManager.subscribe(
streamId,
(event) => {
if (!res.writableEnded) {
res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`);
if (typeof res.flush === 'function') {
res.flush();
}
}
},
(event) => {
if (!res.writableEnded) {
res.write(`event: message\ndata: ${JSON.stringify(event)}\n\n`);
if (typeof res.flush === 'function') {
res.flush();
}
res.end();
}
},
(error) => {
if (!res.writableEnded) {
res.write(`event: error\ndata: ${JSON.stringify({ error })}\n\n`);
if (typeof res.flush === 'function') {
res.flush();
}
res.end();
}
},
);
if (!result) {
return res.status(404).json({ error: 'Failed to subscribe to stream' });
}
req.on('close', () => {
logger.debug(`[AgentStream] Client disconnected from ${streamId}`);
result.unsubscribe();
});
});
/**
* @route GET /chat/active
* @desc Get all active generation job IDs for the current user
* @access Private
* @returns { activeJobIds: string[] }
*/
router.get('/chat/active', async (req, res) => {
const activeJobIds = await GenerationJobManager.getActiveJobIdsForUser(req.user.id);
res.json({ activeJobIds });
});
/**
* @route GET /chat/status/:conversationId
* @desc Check if there's an active generation job for a conversation
* @access Private
* @returns { active, streamId, status, aggregatedContent, createdAt, resumeState }
*/
router.get('/chat/status/:conversationId', async (req, res) => {
const { conversationId } = req.params;
// streamId === conversationId, so we can use getJob directly
const job = await GenerationJobManager.getJob(conversationId);
if (!job) {
return res.json({ active: false });
}
if (job.metadata.userId !== req.user.id) {
return res.status(403).json({ error: 'Unauthorized' });
}
// Get resume state which contains aggregatedContent
// Avoid calling both getStreamInfo and getResumeState (both fetch content)
const resumeState = await GenerationJobManager.getResumeState(conversationId);
const isActive = job.status === 'running';
res.json({
active: isActive,
streamId: conversationId,
status: job.status,
aggregatedContent: resumeState?.aggregatedContent ?? [],
createdAt: job.createdAt,
resumeState,
});
});
/**
* @route POST /chat/abort
* @desc Abort an ongoing generation job
* @access Private
* @description Mounted before chatRouter to bypass buildEndpointOption middleware
*/
router.post('/chat/abort', async (req, res) => {
logger.debug(`[AgentStream] ========== ABORT ENDPOINT HIT ==========`);
logger.debug(`[AgentStream] Method: ${req.method}, Path: ${req.path}`);
logger.debug(`[AgentStream] Body:`, req.body);
const { streamId, conversationId, abortKey } = req.body;
const userId = req.user?.id;
// streamId === conversationId, so try any of the provided IDs
// Skip "new" as it's a placeholder for new conversations, not an actual ID
let jobStreamId =
streamId || (conversationId !== 'new' ? conversationId : null) || abortKey?.split(':')[0];
let job = jobStreamId ? await GenerationJobManager.getJob(jobStreamId) : null;
// Fallback: if job not found and we have a userId, look up active jobs for user
// This handles the case where frontend sends "new" but job was created with a UUID
if (!job && userId) {
logger.debug(`[AgentStream] Job not found by ID, checking active jobs for user: ${userId}`);
const activeJobIds = await GenerationJobManager.getActiveJobIdsForUser(userId);
if (activeJobIds.length > 0) {
// Abort the most recent active job for this user
jobStreamId = activeJobIds[0];
job = await GenerationJobManager.getJob(jobStreamId);
logger.debug(`[AgentStream] Found active job for user: ${jobStreamId}`);
}
}
logger.debug(`[AgentStream] Computed jobStreamId: ${jobStreamId}`);
if (job && jobStreamId) {
logger.debug(`[AgentStream] Job found, aborting: ${jobStreamId}`);
await GenerationJobManager.abortJob(jobStreamId);
logger.debug(`[AgentStream] Job aborted successfully: ${jobStreamId}`);
return res.json({ success: true, aborted: jobStreamId });
}
logger.warn(`[AgentStream] Job not found for streamId: ${jobStreamId}`);
return res.status(404).json({ error: 'Job not found', streamId: jobStreamId });
});
const chatRouter = express.Router();
chatRouter.use(configMiddleware);
if (isEnabled(LIMIT_MESSAGE_IP)) {
chatRouter.use(messageIpLimiter);
}
if (isEnabled(LIMIT_MESSAGE_USER)) {
chatRouter.use(messageUserLimiter);
}
chatRouter.use('/', chat);
router.use('/chat', chatRouter);
module.exports = router;