🚦 refactor: Concurrent Request Limiter for Resumable Streams (#11167)

* feat: Implement concurrent request handling in ResumableAgentController - Introduced a new concurrency management system by adding `checkAndIncrementPendingRequest` and `decrementPendingRequest` functions to manage user request limits. - Replaced the previous `concurrentLimiter` middleware with a more integrated approach directly within the `ResumableAgentController`. - Enhanced violation logging and request denial for users exceeding their concurrent request limits. - Removed the obsolete `concurrentLimiter` middleware file and updated related imports across the codebase. * refactor: Simplify error handling in ResumableAgentController and enhance SSE error management - Removed the `denyRequest` middleware and replaced it with a direct response for concurrent request violations in the ResumableAgentController. - Improved error handling in the `useResumableSSE` hook to differentiate between network errors and other error types, ensuring more informative error responses are sent to the error handler. * test: Enhance MCP server configuration tests with new mocks and improved logging - Added mocks for MCP server registry and manager in `index.spec.js` to facilitate testing of server configurations. - Updated debug logging in `initializeMCPs.spec.js` to simplify messages regarding server configurations, improving clarity in test outputs. * refactor: Enhance concurrency management in request handling - Updated `checkAndIncrementPendingRequest` and `decrementPendingRequest` functions to utilize Redis for atomic request counting, improving concurrency control. - Added error handling for Redis operations to ensure requests can proceed even during Redis failures. - Streamlined cache key generation for both Redis and in-memory fallback, enhancing clarity and performance in managing pending requests. - Improved comments and documentation for better understanding of the concurrency logic and its implications. * refactor: Improve atomicity in Redis operations for pending request management - Updated `checkAndIncrementPendingRequest` to utilize Redis pipelines for atomic INCR and EXPIRE operations, enhancing concurrency control and preventing edge cases. - Added error handling for pipeline execution failures to ensure robust request management. - Improved comments for clarity on the concurrency logic and its implications.
2026-02-20 09:24:10 +01:00 · 2026-01-01 11:10:56 -05:00 · 2026-01-01 11:10:56 -05:00 · a7aa4dc91b
commit a7aa4dc91b
parent a2361aa891
9 changed files with 272 additions and 91 deletions
--- a/packages/api/src/middleware/concurrency.ts
+++ b/packages/api/src/middleware/concurrency.ts
@ -0,0 +1,227 @@
+import { logger } from '@librechat/data-schemas';
+import { CacheKeys, Time, ViolationTypes } from 'librechat-data-provider';
+import { standardCache, cacheConfig, ioredisClient } from '~/cache';
+import { isEnabled, math } from '~/utils';
+
+const { USE_REDIS } = cacheConfig;
+
+const LIMIT_CONCURRENT_MESSAGES = process.env.LIMIT_CONCURRENT_MESSAGES;
+const CONCURRENT_MESSAGE_MAX = math(process.env.CONCURRENT_MESSAGE_MAX, 2);
+const CONCURRENT_VIOLATION_SCORE = math(process.env.CONCURRENT_VIOLATION_SCORE, 1);
+
+/** Lazily initialized cache for pending requests (used only for in-memory fallback) */
+let pendingReqCache: ReturnType<typeof standardCache> | null = null;
+
+/**
+ * Get or create the pending requests cache for in-memory fallback.
+ * Uses lazy initialization to avoid creating cache before app is ready.
+ */
+function getPendingReqCache(): ReturnType<typeof standardCache> | null {
+  if (!isEnabled(LIMIT_CONCURRENT_MESSAGES)) {
+    return null;
+  }
+  if (!pendingReqCache) {
+    pendingReqCache = standardCache(CacheKeys.PENDING_REQ);
+  }
+  return pendingReqCache;
+}
+
+/**
+ * Build the cache key for a user's pending requests.
+ * Note: ioredisClient already has keyPrefix applied, so we only add namespace:userId
+ */
+function buildKey(userId: string): string {
+  const namespace = CacheKeys.PENDING_REQ;
+  return `${namespace}:${userId}`;
+}
+
+/**
+ * Build the cache key for in-memory fallback (Keyv).
+ */
+function buildMemoryKey(userId: string): string {
+  return `:${userId}`;
+}
+
+export interface PendingRequestResult {
+  allowed: boolean;
+  pendingRequests: number;
+  limit: number;
+}
+
+export interface ViolationInfo {
+  type: string;
+  limit: number;
+  pendingRequests: number;
+  score: number;
+}
+
+/**
+ * Check if a user can make a new concurrent request and increment the counter if allowed.
+ * This is designed for resumable streams where the HTTP response lifecycle doesn't match
+ * the actual request processing lifecycle.
+ *
+ * When Redis is available, uses atomic INCR to prevent race conditions.
+ * Falls back to non-atomic get/set for in-memory cache.
+ *
+ * @param userId - The user's ID
+ * @returns Object with `allowed` (boolean), `pendingRequests` (current count), and `limit`
+ */
+export async function checkAndIncrementPendingRequest(
+  userId: string,
+): Promise<PendingRequestResult> {
+  const limit = Math.max(CONCURRENT_MESSAGE_MAX, 1);
+
+  if (!isEnabled(LIMIT_CONCURRENT_MESSAGES)) {
+    return { allowed: true, pendingRequests: 0, limit };
+  }
+
+  if (!userId) {
+    logger.warn('[concurrency] checkAndIncrementPendingRequest called without userId');
+    return { allowed: true, pendingRequests: 0, limit };
+  }
+
+  // Use atomic Redis INCR when available to prevent race conditions
+  if (USE_REDIS && ioredisClient) {
+    const key = buildKey(userId);
+    try {
+      // Pipeline ensures INCR and EXPIRE execute atomically in one round-trip
+      // This prevents edge cases where crash between operations leaves key without TTL
+      const pipeline = ioredisClient.pipeline();
+      pipeline.incr(key);
+      pipeline.expire(key, 60);
+      const results = await pipeline.exec();
+
+      if (!results || results[0][0]) {
+        throw new Error('Pipeline execution failed');
+      }
+
+      const newCount = results[0][1] as number;
+
+      if (newCount > limit) {
+        // Over limit - decrement back and reject
+        await ioredisClient.decr(key);
+        logger.debug(
+          `[concurrency] User ${userId} exceeded concurrent limit: ${newCount}/${limit}`,
+        );
+        return { allowed: false, pendingRequests: newCount, limit };
+      }
+
+      logger.debug(
+        `[concurrency] User ${userId} incremented pending requests: ${newCount}/${limit}`,
+      );
+      return { allowed: true, pendingRequests: newCount, limit };
+    } catch (error) {
+      logger.error('[concurrency] Redis atomic increment failed:', error);
+      // On Redis error, allow the request to proceed (fail-open)
+      return { allowed: true, pendingRequests: 0, limit };
+    }
+  }
+
+  // Fallback: non-atomic in-memory cache (race condition possible but acceptable for in-memory)
+  const cache = getPendingReqCache();
+  if (!cache) {
+    return { allowed: true, pendingRequests: 0, limit };
+  }
+
+  const key = buildMemoryKey(userId);
+  const pendingRequests = +((await cache.get(key)) ?? 0);
+
+  if (pendingRequests >= limit) {
+    logger.debug(
+      `[concurrency] User ${userId} exceeded concurrent limit: ${pendingRequests}/${limit}`,
+    );
+    return { allowed: false, pendingRequests, limit };
+  }
+
+  await cache.set(key, pendingRequests + 1, Time.ONE_MINUTE);
+  logger.debug(
+    `[concurrency] User ${userId} incremented pending requests: ${pendingRequests + 1}/${limit}`,
+  );
+
+  return { allowed: true, pendingRequests: pendingRequests + 1, limit };
+}
+
+/**
+ * Decrement the pending request counter for a user.
+ * Should be called when a generation job completes, errors, or is aborted.
+ *
+ * This function handles errors internally and will never throw - it's a cleanup
+ * operation that should not interrupt the main flow if cache operations fail.
+ *
+ * When Redis is available, uses atomic DECR to prevent race conditions.
+ * Falls back to non-atomic get/set for in-memory cache.
+ *
+ * @param userId - The user's ID
+ */
+export async function decrementPendingRequest(userId: string): Promise<void> {
+  try {
+    if (!isEnabled(LIMIT_CONCURRENT_MESSAGES)) {
+      return;
+    }
+
+    if (!userId) {
+      logger.warn('[concurrency] decrementPendingRequest called without userId');
+      return;
+    }
+
+    // Use atomic Redis DECR when available
+    if (USE_REDIS && ioredisClient) {
+      const key = buildKey(userId);
+      try {
+        const newCount = await ioredisClient.decr(key);
+        if (newCount < 0) {
+          // Counter went negative - reset to 0 and delete
+          await ioredisClient.del(key);
+          logger.debug(`[concurrency] User ${userId} pending requests cleared (was negative)`);
+        } else if (newCount === 0) {
+          // Clean up zero-value keys
+          await ioredisClient.del(key);
+          logger.debug(`[concurrency] User ${userId} pending requests cleared`);
+        } else {
+          logger.debug(`[concurrency] User ${userId} decremented pending requests: ${newCount}`);
+        }
+      } catch (error) {
+        logger.error('[concurrency] Redis atomic decrement failed:', error);
+      }
+      return;
+    }
+
+    // Fallback: non-atomic in-memory cache
+    const cache = getPendingReqCache();
+    if (!cache) {
+      return;
+    }
+
+    const key = buildMemoryKey(userId);
+    const currentReq = +((await cache.get(key)) ?? 0);
+
+    if (currentReq >= 1) {
+      await cache.set(key, currentReq - 1, Time.ONE_MINUTE);
+      logger.debug(`[concurrency] User ${userId} decremented pending requests: ${currentReq - 1}`);
+    } else {
+      await cache.delete(key);
+      logger.debug(`[concurrency] User ${userId} pending requests cleared (was ${currentReq})`);
+    }
+  } catch (error) {
+    logger.error('[concurrency] Error decrementing pending request:', error);
+  }
+}
+
+/**
+ * Get violation info for logging purposes when a user exceeds the concurrent request limit.
+ */
+export function getViolationInfo(pendingRequests: number, limit: number): ViolationInfo {
+  return {
+    type: ViolationTypes.CONCURRENT,
+    limit,
+    pendingRequests,
+    score: CONCURRENT_VIOLATION_SCORE,
+  };
+}
+
+/**
+ * Check if concurrent message limiting is enabled.
+ */
+export function isConcurrentLimitEnabled(): boolean {
+  return isEnabled(LIMIT_CONCURRENT_MESSAGES);
+}
--- a/packages/api/src/middleware/index.ts
+++ b/packages/api/src/middleware/index.ts
@ -2,3 +2,4 @@ export * from './access';
 export * from './error';
 export * from './balance';
 export * from './json';
+export * from './concurrency';