mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-02 16:48:50 +01:00
🚦 refactor: Concurrent Request Limiter for Resumable Streams (#11167)
* feat: Implement concurrent request handling in ResumableAgentController - Introduced a new concurrency management system by adding `checkAndIncrementPendingRequest` and `decrementPendingRequest` functions to manage user request limits. - Replaced the previous `concurrentLimiter` middleware with a more integrated approach directly within the `ResumableAgentController`. - Enhanced violation logging and request denial for users exceeding their concurrent request limits. - Removed the obsolete `concurrentLimiter` middleware file and updated related imports across the codebase. * refactor: Simplify error handling in ResumableAgentController and enhance SSE error management - Removed the `denyRequest` middleware and replaced it with a direct response for concurrent request violations in the ResumableAgentController. - Improved error handling in the `useResumableSSE` hook to differentiate between network errors and other error types, ensuring more informative error responses are sent to the error handler. * test: Enhance MCP server configuration tests with new mocks and improved logging - Added mocks for MCP server registry and manager in `index.spec.js` to facilitate testing of server configurations. - Updated debug logging in `initializeMCPs.spec.js` to simplify messages regarding server configurations, improving clarity in test outputs. * refactor: Enhance concurrency management in request handling - Updated `checkAndIncrementPendingRequest` and `decrementPendingRequest` functions to utilize Redis for atomic request counting, improving concurrency control. - Added error handling for Redis operations to ensure requests can proceed even during Redis failures. - Streamlined cache key generation for both Redis and in-memory fallback, enhancing clarity and performance in managing pending requests. - Improved comments and documentation for better understanding of the concurrency logic and its implications. * refactor: Improve atomicity in Redis operations for pending request management - Updated `checkAndIncrementPendingRequest` to utilize Redis pipelines for atomic INCR and EXPIRE operations, enhancing concurrency control and preventing edge cases. - Added error handling for pipeline execution failures to ensure robust request management. - Improved comments for clarity on the concurrency logic and its implications.
This commit is contained in:
parent
a2361aa891
commit
a7aa4dc91b
9 changed files with 272 additions and 91 deletions
|
|
@ -1,13 +1,17 @@
|
|||
const { logger } = require('@librechat/data-schemas');
|
||||
const { Constants } = require('librechat-data-provider');
|
||||
const { Constants, ViolationTypes } = require('librechat-data-provider');
|
||||
const {
|
||||
sendEvent,
|
||||
getViolationInfo,
|
||||
GenerationJobManager,
|
||||
decrementPendingRequest,
|
||||
sanitizeFileForTransmit,
|
||||
sanitizeMessageForTransmit,
|
||||
checkAndIncrementPendingRequest,
|
||||
} = require('@librechat/api');
|
||||
const { handleAbortError } = require('~/server/middleware');
|
||||
const { disposeClient, clientRegistry, requestDataMap } = require('~/server/cleanup');
|
||||
const { handleAbortError } = require('~/server/middleware');
|
||||
const { logViolation } = require('~/cache');
|
||||
const { saveMessage } = require('~/models');
|
||||
|
||||
function createCloseHandler(abortController) {
|
||||
|
|
@ -47,6 +51,13 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
|
||||
const userId = req.user.id;
|
||||
|
||||
const { allowed, pendingRequests, limit } = await checkAndIncrementPendingRequest(userId);
|
||||
if (!allowed) {
|
||||
const violationInfo = getViolationInfo(pendingRequests, limit);
|
||||
await logViolation(req, res, ViolationTypes.CONCURRENT, violationInfo, violationInfo.score);
|
||||
return res.status(429).json(violationInfo);
|
||||
}
|
||||
|
||||
// Generate conversationId upfront if not provided - streamId === conversationId always
|
||||
// Treat "new" as a placeholder that needs a real UUID (frontend may send "new" for new convos)
|
||||
const conversationId =
|
||||
|
|
@ -137,6 +148,7 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
|
||||
if (job.abortController.signal.aborted) {
|
||||
GenerationJobManager.completeJob(streamId, 'Request aborted during initialization');
|
||||
await decrementPendingRequest(userId);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -263,6 +275,7 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
|
||||
GenerationJobManager.emitDone(streamId, finalEvent);
|
||||
GenerationJobManager.completeJob(streamId);
|
||||
await decrementPendingRequest(userId);
|
||||
|
||||
if (client.savedMessageIds && !client.savedMessageIds.has(messageId)) {
|
||||
await saveMessage(
|
||||
|
|
@ -282,6 +295,7 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
};
|
||||
GenerationJobManager.emitDone(streamId, finalEvent);
|
||||
GenerationJobManager.completeJob(streamId, 'Request aborted');
|
||||
await decrementPendingRequest(userId);
|
||||
}
|
||||
|
||||
if (!client.skipSaveUserMessage && userMessage) {
|
||||
|
|
@ -322,6 +336,8 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
GenerationJobManager.completeJob(streamId, error.message);
|
||||
}
|
||||
|
||||
await decrementPendingRequest(userId);
|
||||
|
||||
if (client) {
|
||||
disposeClient(client);
|
||||
}
|
||||
|
|
@ -332,11 +348,12 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
};
|
||||
|
||||
// Start generation and handle any unhandled errors
|
||||
startGeneration().catch((err) => {
|
||||
startGeneration().catch(async (err) => {
|
||||
logger.error(
|
||||
`[ResumableAgentController] Unhandled error in background generation: ${err.message}`,
|
||||
);
|
||||
GenerationJobManager.completeJob(streamId, err.message);
|
||||
await decrementPendingRequest(userId);
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('[ResumableAgentController] Initialization error:', error);
|
||||
|
|
@ -347,6 +364,7 @@ const ResumableAgentController = async (req, res, next, initializeClient, addTit
|
|||
GenerationJobManager.emitError(streamId, error.message || 'Failed to start generation');
|
||||
}
|
||||
GenerationJobManager.completeJob(streamId, error.message);
|
||||
await decrementPendingRequest(userId);
|
||||
if (client) {
|
||||
disposeClient(client);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue