2026-03-10 11:21:36 -07:00
|
|
|
interface FailedMeta {
|
|
|
|
|
attempts: number;
|
|
|
|
|
lastFailedAt: number;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const COOLDOWN_SCHEDULE_MS = [5 * 60 * 1000, 10 * 60 * 1000, 20 * 60 * 1000, 30 * 60 * 1000];
|
|
|
|
|
|
2025-09-17 22:49:36 +02:00
|
|
|
export class OAuthReconnectionTracker {
|
2026-03-10 11:21:36 -07:00
|
|
|
private failedMeta: Map<string, Map<string, FailedMeta>> = new Map();
|
2025-09-21 22:58:19 -04:00
|
|
|
/** Map of userId -> Set of serverNames that are actively reconnecting */
|
2025-09-17 22:49:36 +02:00
|
|
|
private active: Map<string, Set<string>> = new Map();
|
2025-09-21 22:58:19 -04:00
|
|
|
/** Map of userId:serverName -> timestamp when reconnection started */
|
|
|
|
|
private activeTimestamps: Map<string, number> = new Map();
|
|
|
|
|
/** Maximum time (ms) a server can be in reconnecting state before auto-cleanup */
|
|
|
|
|
private readonly RECONNECTION_TIMEOUT_MS = 3 * 60 * 1000; // 3 minutes
|
2025-09-17 22:49:36 +02:00
|
|
|
|
|
|
|
|
public isFailed(userId: string, serverName: string): boolean {
|
2026-03-10 11:21:36 -07:00
|
|
|
const meta = this.failedMeta.get(userId)?.get(serverName);
|
|
|
|
|
if (!meta) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
const idx = Math.min(meta.attempts - 1, COOLDOWN_SCHEDULE_MS.length - 1);
|
|
|
|
|
const cooldown = COOLDOWN_SCHEDULE_MS[idx];
|
|
|
|
|
const elapsed = Date.now() - meta.lastFailedAt;
|
|
|
|
|
if (elapsed >= cooldown) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
2025-09-17 22:49:36 +02:00
|
|
|
}
|
|
|
|
|
|
2025-09-21 22:58:19 -04:00
|
|
|
/** Check if server is in the active set (original simple check) */
|
2025-09-17 22:49:36 +02:00
|
|
|
public isActive(userId: string, serverName: string): boolean {
|
|
|
|
|
return this.active.get(userId)?.has(serverName) ?? false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-21 22:58:19 -04:00
|
|
|
/** Check if server is still reconnecting (considers timeout) */
|
|
|
|
|
public isStillReconnecting(userId: string, serverName: string): boolean {
|
|
|
|
|
if (!this.isActive(userId, serverName)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const key = `${userId}:${serverName}`;
|
|
|
|
|
const startTime = this.activeTimestamps.get(key);
|
|
|
|
|
|
|
|
|
|
// If there's a timestamp and it has timed out, it's not still reconnecting
|
|
|
|
|
if (startTime && Date.now() - startTime > this.RECONNECTION_TIMEOUT_MS) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Clean up server if it has timed out - returns true if cleanup was performed */
|
|
|
|
|
public cleanupIfTimedOut(userId: string, serverName: string): boolean {
|
|
|
|
|
const key = `${userId}:${serverName}`;
|
|
|
|
|
const startTime = this.activeTimestamps.get(key);
|
|
|
|
|
|
|
|
|
|
if (startTime && Date.now() - startTime > this.RECONNECTION_TIMEOUT_MS) {
|
|
|
|
|
this.removeActive(userId, serverName);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-17 22:49:36 +02:00
|
|
|
public setFailed(userId: string, serverName: string): void {
|
2026-03-10 11:21:36 -07:00
|
|
|
if (!this.failedMeta.has(userId)) {
|
|
|
|
|
this.failedMeta.set(userId, new Map());
|
2025-09-17 22:49:36 +02:00
|
|
|
}
|
2026-03-10 11:21:36 -07:00
|
|
|
const userMap = this.failedMeta.get(userId)!;
|
|
|
|
|
const existing = userMap.get(serverName);
|
|
|
|
|
userMap.set(serverName, {
|
|
|
|
|
attempts: (existing?.attempts ?? 0) + 1,
|
|
|
|
|
lastFailedAt: Date.now(),
|
|
|
|
|
});
|
2025-09-17 22:49:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public setActive(userId: string, serverName: string): void {
|
|
|
|
|
if (!this.active.has(userId)) {
|
|
|
|
|
this.active.set(userId, new Set());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.active.get(userId)?.add(serverName);
|
2025-09-21 22:58:19 -04:00
|
|
|
|
|
|
|
|
/** Track when reconnection started */
|
|
|
|
|
const key = `${userId}:${serverName}`;
|
|
|
|
|
this.activeTimestamps.set(key, Date.now());
|
2025-09-17 22:49:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public removeFailed(userId: string, serverName: string): void {
|
2026-03-10 11:21:36 -07:00
|
|
|
const userMap = this.failedMeta.get(userId);
|
|
|
|
|
userMap?.delete(serverName);
|
|
|
|
|
if (userMap?.size === 0) {
|
|
|
|
|
this.failedMeta.delete(userId);
|
2025-09-17 22:49:36 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public removeActive(userId: string, serverName: string): void {
|
|
|
|
|
const userServers = this.active.get(userId);
|
|
|
|
|
userServers?.delete(serverName);
|
|
|
|
|
if (userServers?.size === 0) {
|
|
|
|
|
this.active.delete(userId);
|
|
|
|
|
}
|
2025-09-21 22:58:19 -04:00
|
|
|
|
|
|
|
|
/** Clear timestamp tracking */
|
|
|
|
|
const key = `${userId}:${serverName}`;
|
|
|
|
|
this.activeTimestamps.delete(key);
|
2025-09-17 22:49:36 +02:00
|
|
|
}
|
🪣 fix: Prevent Memory Retention from AsyncLocalStorage Context Propagation (#11942)
* fix: store hide_sequential_outputs before processStream clears config
processStream now clears config.configurable after completion to break
memory retention chains. Save hide_sequential_outputs to a local
variable before calling runAgents so the post-stream filter still works.
* feat: memory diagnostics
* chore: expose garbage collection in backend inspect command
Updated the backend inspect command in package.json to include the --expose-gc flag, enabling garbage collection diagnostics for improved memory management during development.
* chore: update @librechat/agents dependency to version 3.1.52
Bumped the version of @librechat/agents in package.json and package-lock.json to ensure compatibility and access to the latest features and fixes.
* fix: clear heavy config state after processStream to prevent memory leaks
Break the reference chain from LangGraph's internal __pregel_scratchpad
through @langchain/core RunTree.extra[lc:child_config] into the
AsyncLocalStorage context captured by timers and I/O handles.
After stream completion, null out symbol-keyed scratchpad properties
(currentTaskInput), config.configurable, and callbacks. Also call
Graph.clearHeavyState() to release config, signal, content maps,
handler registry, and tool sessions.
* chore: fix imports for memory utils
* chore: add circular dependency check in API build step
Enhanced the backend review workflow to include a check for circular dependencies during the API build process. If a circular dependency is detected, an error message is displayed, and the process exits with a failure status.
* chore: update API build step to include circular dependency detection
Modified the backend review workflow to rename the API package installation step to reflect its new functionality, which now includes detection of circular dependencies during the build process.
* chore: add memory diagnostics option to .env.example
Included a commented-out configuration option for enabling memory diagnostics in the .env.example file, which logs heap and RSS snapshots every 60 seconds when activated.
* chore: remove redundant agentContexts cleanup in disposeClient function
Streamlined the disposeClient function by eliminating duplicate cleanup logic for agentContexts, ensuring efficient memory management during client disposal.
* refactor: move runOutsideTracing utility to utils and update its usage
Refactored the runOutsideTracing function by relocating it to the utils module for better organization. Updated the tool execution handler to utilize the new import, ensuring consistent tracing behavior during tool execution.
* refactor: enhance connection management and diagnostics
Added a method to ConnectionsRepository for retrieving the active connection count. Updated UserConnectionManager to utilize this new method for app connection count reporting. Refined the OAuthReconnectionTracker's getStats method to improve clarity in diagnostics. Introduced a new tracing utility in the utils module to streamline tracing context management. Additionally, added a safeguard in memory diagnostics to prevent unnecessary snapshot collection for very short intervals.
* refactor: enhance tracing utility and add memory diagnostics tests
Refactored the runOutsideTracing function to improve warning logic when the AsyncLocalStorage context is missing. Added tests for memory diagnostics and tracing utilities to ensure proper functionality and error handling. Introduced a new test suite for memory diagnostics, covering snapshot collection and garbage collection behavior.
2026-02-25 17:41:23 -05:00
|
|
|
|
|
|
|
|
/** Returns map sizes for diagnostics */
|
|
|
|
|
public getStats(): {
|
|
|
|
|
usersWithFailedServers: number;
|
|
|
|
|
usersWithActiveReconnections: number;
|
|
|
|
|
activeTimestamps: number;
|
|
|
|
|
} {
|
|
|
|
|
return {
|
2026-03-10 11:21:36 -07:00
|
|
|
usersWithFailedServers: this.failedMeta.size,
|
🪣 fix: Prevent Memory Retention from AsyncLocalStorage Context Propagation (#11942)
* fix: store hide_sequential_outputs before processStream clears config
processStream now clears config.configurable after completion to break
memory retention chains. Save hide_sequential_outputs to a local
variable before calling runAgents so the post-stream filter still works.
* feat: memory diagnostics
* chore: expose garbage collection in backend inspect command
Updated the backend inspect command in package.json to include the --expose-gc flag, enabling garbage collection diagnostics for improved memory management during development.
* chore: update @librechat/agents dependency to version 3.1.52
Bumped the version of @librechat/agents in package.json and package-lock.json to ensure compatibility and access to the latest features and fixes.
* fix: clear heavy config state after processStream to prevent memory leaks
Break the reference chain from LangGraph's internal __pregel_scratchpad
through @langchain/core RunTree.extra[lc:child_config] into the
AsyncLocalStorage context captured by timers and I/O handles.
After stream completion, null out symbol-keyed scratchpad properties
(currentTaskInput), config.configurable, and callbacks. Also call
Graph.clearHeavyState() to release config, signal, content maps,
handler registry, and tool sessions.
* chore: fix imports for memory utils
* chore: add circular dependency check in API build step
Enhanced the backend review workflow to include a check for circular dependencies during the API build process. If a circular dependency is detected, an error message is displayed, and the process exits with a failure status.
* chore: update API build step to include circular dependency detection
Modified the backend review workflow to rename the API package installation step to reflect its new functionality, which now includes detection of circular dependencies during the build process.
* chore: add memory diagnostics option to .env.example
Included a commented-out configuration option for enabling memory diagnostics in the .env.example file, which logs heap and RSS snapshots every 60 seconds when activated.
* chore: remove redundant agentContexts cleanup in disposeClient function
Streamlined the disposeClient function by eliminating duplicate cleanup logic for agentContexts, ensuring efficient memory management during client disposal.
* refactor: move runOutsideTracing utility to utils and update its usage
Refactored the runOutsideTracing function by relocating it to the utils module for better organization. Updated the tool execution handler to utilize the new import, ensuring consistent tracing behavior during tool execution.
* refactor: enhance connection management and diagnostics
Added a method to ConnectionsRepository for retrieving the active connection count. Updated UserConnectionManager to utilize this new method for app connection count reporting. Refined the OAuthReconnectionTracker's getStats method to improve clarity in diagnostics. Introduced a new tracing utility in the utils module to streamline tracing context management. Additionally, added a safeguard in memory diagnostics to prevent unnecessary snapshot collection for very short intervals.
* refactor: enhance tracing utility and add memory diagnostics tests
Refactored the runOutsideTracing function to improve warning logic when the AsyncLocalStorage context is missing. Added tests for memory diagnostics and tracing utilities to ensure proper functionality and error handling. Introduced a new test suite for memory diagnostics, covering snapshot collection and garbage collection behavior.
2026-02-25 17:41:23 -05:00
|
|
|
usersWithActiveReconnections: this.active.size,
|
|
|
|
|
activeTimestamps: this.activeTimestamps.size,
|
|
|
|
|
};
|
|
|
|
|
}
|
2025-09-17 22:49:36 +02:00
|
|
|
}
|