♻️ refactor: On-demand MCP connections: remove proactive reconnect, default to available (#11839)

* feat: Implement reconnection staggering and backoff jitter for MCP connections

- Enhanced the reconnection logic in OAuthReconnectionManager to stagger reconnection attempts for multiple servers, reducing the risk of connection storms.
- Introduced a backoff delay with random jitter in MCPConnection to improve reconnection behavior during network issues.
- Updated the ConnectionsRepository to handle multiple server connections concurrently with a defined concurrency limit.

Added tests to ensure the new reconnection strategy works as intended.

* refactor: Update MCP server query configuration for improved data freshness

- Reduced stale time from 5 minutes to 30 seconds to ensure quicker updates on server initialization.
- Enabled refetching on window focus and mount to enhance data accuracy during user interactions.

* ♻️  refactor: On-demand MCP connections; remove proactive reconnection, default to available

  - Remove reconnectServers() from refresh controller (connection storm root cause)
  - Stop gating server selection on connection status; add to selection immediately
  - Render agent panel tools from DB cache, not live connection status
  - Proceed to cached tools on init failure (only gate on OAuth)
  - Remove unused batchToggleServers()
  - Reduce useMCPServersQuery staleTime from 5min to 30s, enable refetchOnMount/WindowFocus

* refactor: Optimize MCP tool initialization and server connection logic

- Adjusted tool initialization to only occur if no cached tools are available, improving efficiency.
- Updated comments for clarity on server connection and tool fetching processes.
- Removed unnecessary connection status checks during server selection to streamline the user experience.
This commit is contained in:
Danny Avila 2026-02-17 22:33:57 -05:00 committed by GitHub
parent dbf8cd40d3
commit 3bf715e05e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 101 additions and 63 deletions

View file

@ -336,6 +336,69 @@ describe('OAuthReconnectionManager', () => {
});
});
describe('reconnection staggering', () => {
let reconnectionTracker: OAuthReconnectionTracker;
beforeEach(async () => {
jest.useFakeTimers();
reconnectionTracker = new OAuthReconnectionTracker();
reconnectionManager = await OAuthReconnectionManager.createInstance(
flowManager,
tokenMethods,
reconnectionTracker,
);
});
afterEach(() => {
jest.useRealTimers();
});
it('should stagger reconnection attempts for multiple servers', async () => {
const userId = 'user-123';
const oauthServers = new Set(['server1', 'server2', 'server3']);
(mockRegistryInstance.getOAuthServers as jest.Mock).mockResolvedValue(oauthServers);
// All servers have valid tokens and are not connected
tokenMethods.findToken.mockImplementation(async ({ identifier }) => {
return {
userId,
identifier,
expiresAt: new Date(Date.now() + 3600000),
} as unknown as MCPOAuthTokens;
});
const mockNewConnection = {
isConnected: jest.fn().mockResolvedValue(true),
disconnect: jest.fn(),
};
mockMCPManager.getUserConnection.mockResolvedValue(
mockNewConnection as unknown as MCPConnection,
);
(mockRegistryInstance.getServerConfig as jest.Mock).mockResolvedValue(
{} as unknown as MCPOptions,
);
await reconnectionManager.reconnectServers(userId);
// Only the first server should have been attempted immediately
expect(mockMCPManager.getUserConnection).toHaveBeenCalledTimes(1);
expect(mockMCPManager.getUserConnection).toHaveBeenCalledWith(
expect.objectContaining({ serverName: 'server1' }),
);
// After advancing all timers, all servers should have been attempted
await jest.runAllTimersAsync();
expect(mockMCPManager.getUserConnection).toHaveBeenCalledTimes(3);
expect(mockMCPManager.getUserConnection).toHaveBeenCalledWith(
expect.objectContaining({ serverName: 'server2' }),
);
expect(mockMCPManager.getUserConnection).toHaveBeenCalledWith(
expect.objectContaining({ serverName: 'server3' }),
);
});
});
describe('reconnection timeout behavior', () => {
let reconnectionTracker: OAuthReconnectionTracker;

View file

@ -7,6 +7,7 @@ import { MCPManager } from '~/mcp/MCPManager';
import { MCPServersRegistry } from '~/mcp/registry/MCPServersRegistry';
const DEFAULT_CONNECTION_TIMEOUT_MS = 10_000; // ms
const RECONNECT_STAGGER_MS = 500; // ms between each server reconnection
export class OAuthReconnectionManager {
private static instance: OAuthReconnectionManager | null = null;
@ -84,9 +85,14 @@ export class OAuthReconnectionManager {
this.reconnectionsTracker.setActive(userId, serverName);
}
// 3. attempt to reconnect the servers
for (const serverName of serversToReconnect) {
void this.tryReconnect(userId, serverName);
// 3. attempt to reconnect the servers with staggered delays to avoid connection storms
for (let i = 0; i < serversToReconnect.length; i++) {
const serverName = serversToReconnect[i];
if (i === 0) {
void this.tryReconnect(userId, serverName);
} else {
setTimeout(() => void this.tryReconnect(userId, serverName), i * RECONNECT_STAGGER_MS);
}
}
}