refactor: MCP OAuth Polling with Gradual Backoff and Timeout Handling (#9752)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled

* refactor: Implement gradual backoff polling for oauth connection status with timeout handling

* refactor: Enhance OAuth polling with gradual backoff and timeout handling; update reconnection tracking

* refactor: reconnection timeout behavior in OAuthReconnectionManager and OAuthReconnectionTracker

- Implement tests to verify reconnection timeout handling, including tracking of reconnection states and cleanup of timed-out entries.
- Enhance existing methods in OAuthReconnectionManager and OAuthReconnectionTracker to support timeout checks and cleanup logic.
- Ensure proper handling of multiple servers with different timeout periods and edge cases for active states.

* chore: remove comment

* refactor: Enforce strict 3-minute OAuth timeout with updated polling intervals and improved timeout handling

* refactor: Remove unused polling logic and prevent duplicate polling for servers in MCP server manager

* refactor: Update localization key for no memories message in MemoryViewer

* refactor: Improve MCP tool initialization by handling server failures

- Introduced a mechanism to track failed MCP servers, preventing retries for unavailable servers.
- Added logging for failed tool creation attempts to enhance debugging and monitoring.

* refactor: Update reconnection timeout to enforce a strict 3-minute limit

* ci: Update reconnection timeout tests to reflect a strict 3-minute limit

* ci: Update reconnection timeout tests to enforce a strict 3-minute limit

* chore: Remove unused MCP connection timeout message
This commit is contained in:
Danny Avila 2025-09-21 22:58:19 -04:00 committed by GitHub
parent f0599ad36c
commit 96870e0da0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 560 additions and 41 deletions

View file

@ -129,7 +129,7 @@ export function useMCPServerManager({ conversationId }: { conversationId?: strin
(serverName: string) => {
const state = serverStates[serverName];
if (state?.pollInterval) {
clearInterval(state.pollInterval);
clearTimeout(state.pollInterval);
}
updateServerState(serverName, {
isInitializing: false,
@ -144,8 +144,53 @@ export function useMCPServerManager({ conversationId }: { conversationId?: strin
const startServerPolling = useCallback(
(serverName: string) => {
const pollInterval = setInterval(async () => {
// Prevent duplicate polling for the same server
const existingState = serverStates[serverName];
if (existingState?.pollInterval) {
console.debug(`[MCP Manager] Polling already active for ${serverName}, skipping duplicate`);
return;
}
let pollAttempts = 0;
let timeoutId: NodeJS.Timeout | null = null;
/** OAuth typically completes in 5 seconds to 3 minutes
* We enforce a strict 3-minute timeout with gradual backoff
*/
const getPollInterval = (attempt: number): number => {
if (attempt < 12) return 5000; // First minute: every 5s (12 polls)
if (attempt < 22) return 6000; // Second minute: every 6s (10 polls)
return 7500; // Final minute: every 7.5s (8 polls)
};
const maxAttempts = 30; // Exactly 3 minutes (180 seconds) total
const OAUTH_TIMEOUT_MS = 180000; // 3 minutes in milliseconds
const pollOnce = async () => {
try {
pollAttempts++;
const state = serverStates[serverName];
/** Stop polling after 3 minutes or max attempts */
const elapsedTime = state?.oauthStartTime
? Date.now() - state.oauthStartTime
: pollAttempts * 5000; // Rough estimate if no start time
if (pollAttempts > maxAttempts || elapsedTime > OAUTH_TIMEOUT_MS) {
console.warn(
`[MCP Manager] OAuth timeout for ${serverName} after ${(elapsedTime / 1000).toFixed(0)}s (attempt ${pollAttempts})`,
);
showToast({
message: localize('com_ui_mcp_oauth_timeout', { 0: serverName }),
status: 'error',
});
if (timeoutId) {
clearTimeout(timeoutId);
}
cleanupServerState(serverName);
return;
}
await queryClient.refetchQueries([QueryKeys.mcpConnectionStatus]);
const freshConnectionData = queryClient.getQueryData([
@ -153,11 +198,12 @@ export function useMCPServerManager({ conversationId }: { conversationId?: strin
]) as any;
const freshConnectionStatus = freshConnectionData?.connectionStatus || {};
const state = serverStates[serverName];
const serverStatus = freshConnectionStatus[serverName];
if (serverStatus?.connectionState === 'connected') {
clearInterval(pollInterval);
if (timeoutId) {
clearTimeout(timeoutId);
}
showToast({
message: localize('com_ui_mcp_authenticated_success', { 0: serverName }),
@ -179,12 +225,15 @@ export function useMCPServerManager({ conversationId }: { conversationId?: strin
return;
}
if (state?.oauthStartTime && Date.now() - state.oauthStartTime > 180000) {
// Check for OAuth timeout (should align with maxAttempts)
if (state?.oauthStartTime && Date.now() - state.oauthStartTime > OAUTH_TIMEOUT_MS) {
showToast({
message: localize('com_ui_mcp_oauth_timeout', { 0: serverName }),
status: 'error',
});
clearInterval(pollInterval);
if (timeoutId) {
clearTimeout(timeoutId);
}
cleanupServerState(serverName);
return;
}
@ -194,19 +243,38 @@ export function useMCPServerManager({ conversationId }: { conversationId?: strin
message: localize('com_ui_mcp_init_failed'),
status: 'error',
});
clearInterval(pollInterval);
if (timeoutId) {
clearTimeout(timeoutId);
}
cleanupServerState(serverName);
return;
}
// Schedule next poll with smart intervals based on OAuth timing
const nextInterval = getPollInterval(pollAttempts);
// Log progress periodically
if (pollAttempts % 5 === 0 || pollAttempts <= 2) {
console.debug(
`[MCP Manager] Polling ${serverName} attempt ${pollAttempts}/${maxAttempts}, next in ${nextInterval / 1000}s`,
);
}
timeoutId = setTimeout(pollOnce, nextInterval);
updateServerState(serverName, { pollInterval: timeoutId });
} catch (error) {
console.error(`[MCP Manager] Error polling server ${serverName}:`, error);
clearInterval(pollInterval);
if (timeoutId) {
clearTimeout(timeoutId);
}
cleanupServerState(serverName);
return;
}
}, 3500);
};
updateServerState(serverName, { pollInterval });
// Start the first poll
timeoutId = setTimeout(pollOnce, getPollInterval(0));
updateServerState(serverName, { pollInterval: timeoutId });
},
[
queryClient,