refactor: MCP OAuth Polling with Gradual Backoff and Timeout Handling (#9752)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled

* refactor: Implement gradual backoff polling for oauth connection status with timeout handling

* refactor: Enhance OAuth polling with gradual backoff and timeout handling; update reconnection tracking

* refactor: reconnection timeout behavior in OAuthReconnectionManager and OAuthReconnectionTracker

- Implement tests to verify reconnection timeout handling, including tracking of reconnection states and cleanup of timed-out entries.
- Enhance existing methods in OAuthReconnectionManager and OAuthReconnectionTracker to support timeout checks and cleanup logic.
- Ensure proper handling of multiple servers with different timeout periods and edge cases for active states.

* chore: remove comment

* refactor: Enforce strict 3-minute OAuth timeout with updated polling intervals and improved timeout handling

* refactor: Remove unused polling logic and prevent duplicate polling for servers in MCP server manager

* refactor: Update localization key for no memories message in MemoryViewer

* refactor: Improve MCP tool initialization by handling server failures

- Introduced a mechanism to track failed MCP servers, preventing retries for unavailable servers.
- Added logging for failed tool creation attempts to enhance debugging and monitoring.

* refactor: Update reconnection timeout to enforce a strict 3-minute limit

* ci: Update reconnection timeout tests to reflect a strict 3-minute limit

* ci: Update reconnection timeout tests to enforce a strict 3-minute limit

* chore: Remove unused MCP connection timeout message
This commit is contained in:
Danny Avila 2025-09-21 22:58:19 -04:00 committed by GitHub
parent f0599ad36c
commit 96870e0da0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 560 additions and 41 deletions

View file

@ -319,4 +319,148 @@ describe('OAuthReconnectionManager', () => {
expect(mockMCPManager.disconnectUserConnection).not.toHaveBeenCalled();
});
});
describe('reconnection timeout behavior', () => {
let reconnectionTracker: OAuthReconnectionTracker;
beforeEach(async () => {
jest.useFakeTimers();
reconnectionTracker = new OAuthReconnectionTracker();
reconnectionManager = await OAuthReconnectionManager.createInstance(
flowManager,
tokenMethods,
reconnectionTracker,
);
});
afterEach(() => {
jest.useRealTimers();
});
it('should handle timed out reconnections via isReconnecting check', () => {
const userId = 'user-123';
const serverName = 'test-server';
const now = Date.now();
jest.setSystemTime(now);
// Set server as reconnecting
reconnectionTracker.setActive(userId, serverName);
expect(reconnectionManager.isReconnecting(userId, serverName)).toBe(true);
// Advance time by 2 minutes 59 seconds - should still be reconnecting
jest.advanceTimersByTime(2 * 60 * 1000 + 59 * 1000);
expect(reconnectionManager.isReconnecting(userId, serverName)).toBe(true);
// Advance time by 2 more seconds (total 3 minutes 1 second) - should be auto-cleaned
jest.advanceTimersByTime(2000);
expect(reconnectionManager.isReconnecting(userId, serverName)).toBe(false);
});
it('should not attempt to reconnect servers that have timed out during reconnection', async () => {
const userId = 'user-123';
const oauthServers = new Set(['server1', 'server2']);
mockMCPManager.getOAuthServers.mockReturnValue(oauthServers);
const now = Date.now();
jest.setSystemTime(now);
// Set server1 as having been reconnecting for over 5 minutes
reconnectionTracker.setActive(userId, 'server1');
jest.advanceTimersByTime(6 * 60 * 1000); // 6 minutes
// server2: has valid token and not connected
tokenMethods.findToken.mockImplementation(async ({ identifier }) => {
if (identifier === 'mcp:server2') {
return {
userId,
identifier,
expiresAt: new Date(Date.now() + 3600000),
} as unknown as MCPOAuthTokens;
}
return null;
});
// Mock successful reconnection
const mockNewConnection = {
isConnected: jest.fn().mockResolvedValue(true),
disconnect: jest.fn(),
};
mockMCPManager.getUserConnection.mockResolvedValue(
mockNewConnection as unknown as MCPConnection,
);
await reconnectionManager.reconnectServers(userId);
// server1 should still be in active set, just not eligible for reconnection
expect(reconnectionTracker.isActive(userId, 'server1')).toBe(true);
expect(reconnectionTracker.isStillReconnecting(userId, 'server1')).toBe(false);
// Only server2 should be marked as reconnecting
expect(reconnectionTracker.isActive(userId, 'server2')).toBe(true);
// Wait for async reconnection using runAllTimersAsync
await jest.runAllTimersAsync();
// Verify only server2 was reconnected
expect(mockMCPManager.getUserConnection).toHaveBeenCalledTimes(1);
expect(mockMCPManager.getUserConnection).toHaveBeenCalledWith(
expect.objectContaining({
serverName: 'server2',
}),
);
});
it('should properly track reconnection time for multiple sequential reconnect attempts', async () => {
const userId = 'user-123';
const serverName = 'server1';
const oauthServers = new Set([serverName]);
mockMCPManager.getOAuthServers.mockReturnValue(oauthServers);
const now = Date.now();
jest.setSystemTime(now);
// Setup valid token
tokenMethods.findToken.mockResolvedValue({
userId,
identifier: `mcp:${serverName}`,
expiresAt: new Date(Date.now() + 3600000),
} as unknown as MCPOAuthTokens);
// First reconnect attempt - will fail
mockMCPManager.getUserConnection.mockRejectedValueOnce(new Error('Connection failed'));
mockMCPManager.getRawConfig.mockReturnValue({} as unknown as MCPOptions);
await reconnectionManager.reconnectServers(userId);
await jest.runAllTimersAsync();
// Server should be marked as failed
expect(reconnectionTracker.isFailed(userId, serverName)).toBe(true);
expect(reconnectionTracker.isActive(userId, serverName)).toBe(false);
// Clear failed state to allow another attempt
reconnectionManager.clearReconnection(userId, serverName);
// Advance time by 3 minutes
jest.advanceTimersByTime(3 * 60 * 1000);
// Second reconnect attempt - will succeed
const mockConnection = {
isConnected: jest.fn().mockResolvedValue(true),
};
mockMCPManager.getUserConnection.mockResolvedValue(
mockConnection as unknown as MCPConnection,
);
await reconnectionManager.reconnectServers(userId);
// Server should be marked as active with new timestamp
expect(reconnectionTracker.isActive(userId, serverName)).toBe(true);
await jest.runAllTimersAsync();
// After successful reconnection, should be cleared
expect(reconnectionTracker.isActive(userId, serverName)).toBe(false);
expect(reconnectionTracker.isFailed(userId, serverName)).toBe(false);
});
});
});

View file

@ -56,7 +56,9 @@ export class OAuthReconnectionManager {
}
public isReconnecting(userId: string, serverName: string): boolean {
return this.reconnectionsTracker.isActive(userId, serverName);
// Clean up if timed out, then return whether still reconnecting
this.reconnectionsTracker.cleanupIfTimedOut(userId, serverName);
return this.reconnectionsTracker.isStillReconnecting(userId, serverName);
}
public async reconnectServers(userId: string) {
@ -149,6 +151,10 @@ export class OAuthReconnectionManager {
return false;
}
if (this.reconnectionsTracker.isActive(userId, serverName)) {
return false;
}
// if the server is already connected, don't attempt to reconnect
const existingConnections = this.mcpManager.getUserConnections(userId);
if (existingConnections?.has(serverName)) {

View file

@ -178,4 +178,278 @@ describe('OAuthReconnectTracker', () => {
expect(tracker.isFailed(userId, serverName)).toBe(false);
});
});
describe('timeout behavior', () => {
beforeEach(() => {
jest.useFakeTimers();
});
afterEach(() => {
jest.useRealTimers();
});
it('should track timestamp when setting active state', () => {
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
expect(tracker.isActive(userId, serverName)).toBe(true);
// Verify timestamp was recorded (implementation detail tested via timeout behavior)
jest.advanceTimersByTime(1000); // 1 second
expect(tracker.isActive(userId, serverName)).toBe(true);
});
it('should handle timeout checking with isStillReconnecting', () => {
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(true);
// Advance time by 2 minutes 59 seconds - should still be reconnecting
jest.advanceTimersByTime(2 * 60 * 1000 + 59 * 1000);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(true);
// Advance time by 2 more seconds (total 3 minutes 1 second) - should not be still reconnecting
jest.advanceTimersByTime(2000);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false);
// But isActive should still return true (simple check)
expect(tracker.isActive(userId, serverName)).toBe(true);
});
it('should handle multiple servers with different timeout periods', () => {
const now = Date.now();
jest.setSystemTime(now);
// Set server1 as active
tracker.setActive(userId, serverName);
expect(tracker.isActive(userId, serverName)).toBe(true);
// Advance 3 minutes
jest.advanceTimersByTime(3 * 60 * 1000);
// Set server2 as active
tracker.setActive(userId, anotherServer);
expect(tracker.isActive(userId, anotherServer)).toBe(true);
expect(tracker.isActive(userId, serverName)).toBe(true);
// Advance 2 more minutes + 1ms (server1 at 5 min + 1ms, server2 at 2 min + 1ms)
jest.advanceTimersByTime(2 * 60 * 1000 + 1);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false); // server1 timed out
expect(tracker.isStillReconnecting(userId, anotherServer)).toBe(true); // server2 still active
// Advance 3 more minutes (server2 at 5 min + 1ms)
jest.advanceTimersByTime(3 * 60 * 1000);
expect(tracker.isStillReconnecting(userId, anotherServer)).toBe(false); // server2 timed out
});
it('should clear timestamp when removing active state', () => {
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
expect(tracker.isActive(userId, serverName)).toBe(true);
tracker.removeActive(userId, serverName);
expect(tracker.isActive(userId, serverName)).toBe(false);
// Set active again and verify new timestamp is used
jest.advanceTimersByTime(3 * 60 * 1000);
tracker.setActive(userId, serverName);
expect(tracker.isActive(userId, serverName)).toBe(true);
// Advance 4 more minutes from new timestamp - should still be active
jest.advanceTimersByTime(4 * 60 * 1000);
expect(tracker.isActive(userId, serverName)).toBe(true);
});
it('should properly cleanup after timeout occurs', () => {
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
tracker.setActive(userId, anotherServer);
expect(tracker.isActive(userId, serverName)).toBe(true);
expect(tracker.isActive(userId, anotherServer)).toBe(true);
// Advance past timeout
jest.advanceTimersByTime(6 * 60 * 1000);
// Both should still be in active set but not "still reconnecting"
expect(tracker.isActive(userId, serverName)).toBe(true);
expect(tracker.isActive(userId, anotherServer)).toBe(true);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false);
expect(tracker.isStillReconnecting(userId, anotherServer)).toBe(false);
// Cleanup both
expect(tracker.cleanupIfTimedOut(userId, serverName)).toBe(true);
expect(tracker.cleanupIfTimedOut(userId, anotherServer)).toBe(true);
// Now they should be removed from active set
expect(tracker.isActive(userId, serverName)).toBe(false);
expect(tracker.isActive(userId, anotherServer)).toBe(false);
});
it('should handle timeout check for non-existent entries gracefully', () => {
const now = Date.now();
jest.setSystemTime(now);
// Check non-existent entry
expect(tracker.isActive('non-existent', 'non-existent')).toBe(false);
expect(tracker.isStillReconnecting('non-existent', 'non-existent')).toBe(false);
// Set and then manually remove
tracker.setActive(userId, serverName);
tracker.removeActive(userId, serverName);
// Advance time and check - should not throw
jest.advanceTimersByTime(6 * 60 * 1000);
expect(tracker.isActive(userId, serverName)).toBe(false);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false);
});
});
describe('isStillReconnecting', () => {
it('should return true for active entries within timeout', () => {
jest.useFakeTimers();
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(true);
// Still within timeout
jest.advanceTimersByTime(3 * 60 * 1000);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(true);
jest.useRealTimers();
});
it('should return false for timed out entries', () => {
jest.useFakeTimers();
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
// Advance past timeout
jest.advanceTimersByTime(6 * 60 * 1000);
// Should not be still reconnecting
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false);
// But isActive should still return true (simple check)
expect(tracker.isActive(userId, serverName)).toBe(true);
jest.useRealTimers();
});
it('should return false for non-existent entries', () => {
expect(tracker.isStillReconnecting('non-existent', 'non-existent')).toBe(false);
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false);
});
});
describe('cleanupIfTimedOut', () => {
beforeEach(() => {
jest.useFakeTimers();
});
afterEach(() => {
jest.useRealTimers();
});
it('should cleanup timed out entries and return true', () => {
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
expect(tracker.isActive(userId, serverName)).toBe(true);
// Advance past timeout
jest.advanceTimersByTime(6 * 60 * 1000);
// Cleanup should return true and remove the entry
const wasCleanedUp = tracker.cleanupIfTimedOut(userId, serverName);
expect(wasCleanedUp).toBe(true);
expect(tracker.isActive(userId, serverName)).toBe(false);
});
it('should not cleanup active entries and return false', () => {
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
// Within timeout period
jest.advanceTimersByTime(3 * 60 * 1000);
const wasCleanedUp = tracker.cleanupIfTimedOut(userId, serverName);
expect(wasCleanedUp).toBe(false);
expect(tracker.isActive(userId, serverName)).toBe(true);
});
it('should return false for non-existent entries', () => {
const wasCleanedUp = tracker.cleanupIfTimedOut('non-existent', 'non-existent');
expect(wasCleanedUp).toBe(false);
});
});
describe('timestamp tracking edge cases', () => {
beforeEach(() => {
jest.useFakeTimers();
});
afterEach(() => {
jest.useRealTimers();
});
it('should update timestamp when setting active on already active server', () => {
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
expect(tracker.isActive(userId, serverName)).toBe(true);
// Advance 3 minutes
jest.advanceTimersByTime(3 * 60 * 1000);
expect(tracker.isActive(userId, serverName)).toBe(true);
// Set active again - should reset timestamp
tracker.setActive(userId, serverName);
// Advance 4 more minutes from reset (total 7 minutes from start)
jest.advanceTimersByTime(4 * 60 * 1000);
// Should still be active since timestamp was reset at 3 minutes
expect(tracker.isActive(userId, serverName)).toBe(true);
// Advance 2 more minutes (6 minutes from reset)
jest.advanceTimersByTime(2 * 60 * 1000);
// Should not be still reconnecting (timed out)
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false);
});
it('should handle same server for different users independently', () => {
const anotherUserId = 'user456';
const now = Date.now();
jest.setSystemTime(now);
tracker.setActive(userId, serverName);
// Advance 3 minutes
jest.advanceTimersByTime(3 * 60 * 1000);
tracker.setActive(anotherUserId, serverName);
// Advance 3 more minutes
jest.advanceTimersByTime(3 * 60 * 1000);
// First user's connection should be timed out
expect(tracker.isStillReconnecting(userId, serverName)).toBe(false);
// Second user's connection should still be reconnecting
expect(tracker.isStillReconnecting(anotherUserId, serverName)).toBe(true);
});
});
});

View file

@ -1,17 +1,52 @@
export class OAuthReconnectionTracker {
// Map of userId -> Set of serverNames that have failed reconnection
/** Map of userId -> Set of serverNames that have failed reconnection */
private failed: Map<string, Set<string>> = new Map();
// Map of userId -> Set of serverNames that are actively reconnecting
/** Map of userId -> Set of serverNames that are actively reconnecting */
private active: Map<string, Set<string>> = new Map();
/** Map of userId:serverName -> timestamp when reconnection started */
private activeTimestamps: Map<string, number> = new Map();
/** Maximum time (ms) a server can be in reconnecting state before auto-cleanup */
private readonly RECONNECTION_TIMEOUT_MS = 3 * 60 * 1000; // 3 minutes
public isFailed(userId: string, serverName: string): boolean {
return this.failed.get(userId)?.has(serverName) ?? false;
}
/** Check if server is in the active set (original simple check) */
public isActive(userId: string, serverName: string): boolean {
return this.active.get(userId)?.has(serverName) ?? false;
}
/** Check if server is still reconnecting (considers timeout) */
public isStillReconnecting(userId: string, serverName: string): boolean {
if (!this.isActive(userId, serverName)) {
return false;
}
const key = `${userId}:${serverName}`;
const startTime = this.activeTimestamps.get(key);
// If there's a timestamp and it has timed out, it's not still reconnecting
if (startTime && Date.now() - startTime > this.RECONNECTION_TIMEOUT_MS) {
return false;
}
return true;
}
/** Clean up server if it has timed out - returns true if cleanup was performed */
public cleanupIfTimedOut(userId: string, serverName: string): boolean {
const key = `${userId}:${serverName}`;
const startTime = this.activeTimestamps.get(key);
if (startTime && Date.now() - startTime > this.RECONNECTION_TIMEOUT_MS) {
this.removeActive(userId, serverName);
return true;
}
return false;
}
public setFailed(userId: string, serverName: string): void {
if (!this.failed.has(userId)) {
this.failed.set(userId, new Set());
@ -26,6 +61,10 @@ export class OAuthReconnectionTracker {
}
this.active.get(userId)?.add(serverName);
/** Track when reconnection started */
const key = `${userId}:${serverName}`;
this.activeTimestamps.set(key, Date.now());
}
public removeFailed(userId: string, serverName: string): void {
@ -42,5 +81,9 @@ export class OAuthReconnectionTracker {
if (userServers?.size === 0) {
this.active.delete(userId);
}
/** Clear timestamp tracking */
const key = `${userId}:${serverName}`;
this.activeTimestamps.delete(key);
}
}