mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-03-15 12:16:33 +01:00
⛈️ fix: MCP Reconnection Storm Prevention with Circuit Breaker, Backoff, and Tool Stubs (#12162)
* fix: MCP reconnection stability - circuit breaker, throttling, and cooldown retry * Comment and logging cleanup * fix broken tests
This commit is contained in:
parent
cfbe812d63
commit
ad5c51f62b
9 changed files with 736 additions and 38 deletions
|
|
@ -1,6 +1,12 @@
|
|||
interface FailedMeta {
|
||||
attempts: number;
|
||||
lastFailedAt: number;
|
||||
}
|
||||
|
||||
const COOLDOWN_SCHEDULE_MS = [5 * 60 * 1000, 10 * 60 * 1000, 20 * 60 * 1000, 30 * 60 * 1000];
|
||||
|
||||
export class OAuthReconnectionTracker {
|
||||
/** Map of userId -> Set of serverNames that have failed reconnection */
|
||||
private failed: Map<string, Set<string>> = new Map();
|
||||
private failedMeta: Map<string, Map<string, FailedMeta>> = new Map();
|
||||
/** Map of userId -> Set of serverNames that are actively reconnecting */
|
||||
private active: Map<string, Set<string>> = new Map();
|
||||
/** Map of userId:serverName -> timestamp when reconnection started */
|
||||
|
|
@ -9,7 +15,17 @@ export class OAuthReconnectionTracker {
|
|||
private readonly RECONNECTION_TIMEOUT_MS = 3 * 60 * 1000; // 3 minutes
|
||||
|
||||
public isFailed(userId: string, serverName: string): boolean {
|
||||
return this.failed.get(userId)?.has(serverName) ?? false;
|
||||
const meta = this.failedMeta.get(userId)?.get(serverName);
|
||||
if (!meta) {
|
||||
return false;
|
||||
}
|
||||
const idx = Math.min(meta.attempts - 1, COOLDOWN_SCHEDULE_MS.length - 1);
|
||||
const cooldown = COOLDOWN_SCHEDULE_MS[idx];
|
||||
const elapsed = Date.now() - meta.lastFailedAt;
|
||||
if (elapsed >= cooldown) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Check if server is in the active set (original simple check) */
|
||||
|
|
@ -48,11 +64,15 @@ export class OAuthReconnectionTracker {
|
|||
}
|
||||
|
||||
public setFailed(userId: string, serverName: string): void {
|
||||
if (!this.failed.has(userId)) {
|
||||
this.failed.set(userId, new Set());
|
||||
if (!this.failedMeta.has(userId)) {
|
||||
this.failedMeta.set(userId, new Map());
|
||||
}
|
||||
|
||||
this.failed.get(userId)?.add(serverName);
|
||||
const userMap = this.failedMeta.get(userId)!;
|
||||
const existing = userMap.get(serverName);
|
||||
userMap.set(serverName, {
|
||||
attempts: (existing?.attempts ?? 0) + 1,
|
||||
lastFailedAt: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
public setActive(userId: string, serverName: string): void {
|
||||
|
|
@ -68,10 +88,10 @@ export class OAuthReconnectionTracker {
|
|||
}
|
||||
|
||||
public removeFailed(userId: string, serverName: string): void {
|
||||
const userServers = this.failed.get(userId);
|
||||
userServers?.delete(serverName);
|
||||
if (userServers?.size === 0) {
|
||||
this.failed.delete(userId);
|
||||
const userMap = this.failedMeta.get(userId);
|
||||
userMap?.delete(serverName);
|
||||
if (userMap?.size === 0) {
|
||||
this.failedMeta.delete(userId);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +114,7 @@ export class OAuthReconnectionTracker {
|
|||
activeTimestamps: number;
|
||||
} {
|
||||
return {
|
||||
usersWithFailedServers: this.failed.size,
|
||||
usersWithFailedServers: this.failedMeta.size,
|
||||
usersWithActiveReconnections: this.active.size,
|
||||
activeTimestamps: this.activeTimestamps.size,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue