🔄 refactor: MCP Server Init and Stale Cache Handling (#10984)

* 🔧 refactor: Update MCP connection handling to improve performance and testing

* refactor: Replace getAll() with getLoaded() in MCP.js to prevent unnecessary connection creation for user-context servers.
* test: Adjust MCP.spec.js to mock getLoaded() instead of getAll() for consistency with the new implementation.
* feat: Enhance MCPServersInitializer to reset initialization flag for better handling of process restarts and stale data.
* test: Add integration tests to verify re-initialization behavior and ensure stale data is cleared when necessary.

* 🔧 refactor: Enhance cached endpoints config handling for GPT plugins

* refactor: Update MCPServersInitializer tests to use new server management methods

* refactor: Replace direct Redis server manipulation with registry.addServer and registry.getServerConfig for better abstraction and consistency.
* test: Adjust integration tests to verify server initialization and stale data handling using the updated methods.

* 🔧 refactor: Increase retry limits and delay for MCP server creation

* Updated MAX_CREATE_RETRIES from 3 to 5 to allow for more attempts during server creation.
* Increased RETRY_BASE_DELAY_MS from 10 to 25 milliseconds to provide a longer wait time between retries, improving stability in server initialization.

* refactor: Update MCPServersInitializer tests to utilize new registry methods

* refactor: Replace direct access to sharedAppServers with registry.getServerConfig for improved abstraction.
* test: Adjust tests to verify server initialization and stale data handling using the updated registry methods, ensuring consistency and clarity in the test structure.
This commit is contained in:
Danny Avila 2025-12-15 16:46:56 -05:00 committed by GitHub
parent 02fc4647e1
commit dcd9273700
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 198 additions and 12 deletions

View file

@ -19,7 +19,11 @@ async function getEndpointsConfig(req) {
const cache = getLogStores(CacheKeys.CONFIG_STORE);
const cachedEndpointsConfig = await cache.get(CacheKeys.ENDPOINT_CONFIG);
if (cachedEndpointsConfig) {
return cachedEndpointsConfig;
if (cachedEndpointsConfig.gptPlugins) {
await cache.delete(CacheKeys.ENDPOINT_CONFIG);
} else {
return cachedEndpointsConfig;
}
}
const appConfig = req.config ?? (await getAppConfig({ role: req.user?.role }));

View file

@ -448,7 +448,10 @@ async function getMCPSetupData(userId) {
/** @type {Map<string, import('@librechat/api').MCPConnection>} */
let appConnections = new Map();
try {
appConnections = (await mcpManager.appConnections?.getAll()) || new Map();
// Use getLoaded() instead of getAll() to avoid forcing connection creation
// getAll() creates connections for all servers, which is problematic for servers
// that require user context (e.g., those with {{LIBRECHAT_USER_ID}} placeholders)
appConnections = (await mcpManager.appConnections?.getLoaded()) || new Map();
} catch (error) {
logger.error(`[MCP][User: ${userId}] Error getting app connections:`, error);
}

View file

@ -128,7 +128,7 @@ describe('tests for the new helper functions used by the MCP connection status e
beforeEach(() => {
mockGetMCPManager.mockReturnValue({
appConnections: { getAll: jest.fn(() => new Map()) },
appConnections: { getLoaded: jest.fn(() => new Map()) },
getUserConnections: jest.fn(() => new Map()),
});
mockRegistryInstance.getOAuthServers.mockResolvedValue(new Set());
@ -143,7 +143,7 @@ describe('tests for the new helper functions used by the MCP connection status e
const mockOAuthServers = new Set(['server2']);
const mockMCPManager = {
appConnections: { getAll: jest.fn(() => Promise.resolve(mockAppConnections)) },
appConnections: { getLoaded: jest.fn(() => Promise.resolve(mockAppConnections)) },
getUserConnections: jest.fn(() => mockUserConnections),
};
mockGetMCPManager.mockReturnValue(mockMCPManager);
@ -153,7 +153,7 @@ describe('tests for the new helper functions used by the MCP connection status e
expect(mockRegistryInstance.getAllServerConfigs).toHaveBeenCalledWith(mockUserId);
expect(mockGetMCPManager).toHaveBeenCalledWith(mockUserId);
expect(mockMCPManager.appConnections.getAll).toHaveBeenCalled();
expect(mockMCPManager.appConnections.getLoaded).toHaveBeenCalled();
expect(mockMCPManager.getUserConnections).toHaveBeenCalledWith(mockUserId);
expect(mockRegistryInstance.getOAuthServers).toHaveBeenCalledWith(mockUserId);
@ -174,7 +174,7 @@ describe('tests for the new helper functions used by the MCP connection status e
mockRegistryInstance.getAllServerConfigs.mockResolvedValue(mockConfig);
const mockMCPManager = {
appConnections: { getAll: jest.fn(() => Promise.resolve(null)) },
appConnections: { getLoaded: jest.fn(() => Promise.resolve(null)) },
getUserConnections: jest.fn(() => null),
};
mockGetMCPManager.mockReturnValue(mockMCPManager);

View file

@ -30,11 +30,24 @@ export class MCPServersInitializer {
* - Followers wait and poll `statusCache` until the leader finishes, ensuring only one node
* performs the expensive initialization operations.
*/
private static hasInitializedThisProcess = false;
/** Reset the process-level initialization flag. Only used for testing. */
public static resetProcessFlag(): void {
MCPServersInitializer.hasInitializedThisProcess = false;
}
public static async initialize(rawConfigs: t.MCPServers): Promise<void> {
if (await statusCache.isInitialized()) return;
// On first call in this process, always reset and re-initialize
// This ensures we don't use stale Redis data from previous runs
const isFirstCallThisProcess = !MCPServersInitializer.hasInitializedThisProcess;
// Set flag immediately so recursive calls (from followers) use Redis cache for coordination
MCPServersInitializer.hasInitializedThisProcess = true;
if (!isFirstCallThisProcess && (await statusCache.isInitialized())) return;
if (await isLeader()) {
// Leader performs initialization
// Leader performs initialization - always reset on first call
await statusCache.reset();
await MCPServersRegistry.getInstance().reset();
const serverNames = Object.keys(rawConfigs);

View file

@ -201,6 +201,11 @@ describe('MCPServersInitializer Redis Integration Tests', () => {
// Mock MCPConnectionFactory
jest.spyOn(MCPConnectionFactory, 'create').mockResolvedValue(mockConnection);
// Reset caches and process flag before each test
await registryStatusCache.reset();
await registry.reset();
MCPServersInitializer.resetProcessFlag();
});
afterEach(async () => {
@ -261,7 +266,7 @@ describe('MCPServersInitializer Redis Integration Tests', () => {
await MCPServersInitializer.initialize(testConfigs);
// Verify inspect was not called again
expect(MCPServerInspector.inspect).not.toHaveBeenCalled();
expect((MCPServerInspector.inspect as jest.Mock).mock.calls.length).toBe(0);
});
it('should initialize all servers to cache repository', async () => {
@ -309,4 +314,118 @@ describe('MCPServersInitializer Redis Integration Tests', () => {
expect(await registryStatusCache.isInitialized()).toBe(true);
});
});
describe('horizontal scaling / app restart behavior', () => {
it('should re-initialize on first call even if Redis says initialized (simulating app restart)', async () => {
// First: run full initialization
await MCPServersInitializer.initialize(testConfigs);
expect(await registryStatusCache.isInitialized()).toBe(true);
// Add a stale server directly to Redis to simulate stale data
await registry.addServer(
'stale_server',
{
type: 'stdio',
command: 'node',
args: ['stale.js'],
},
'CACHE',
);
expect(await registry.getServerConfig('stale_server')).toBeDefined();
// Simulate app restart by resetting the process flag (but NOT Redis)
MCPServersInitializer.resetProcessFlag();
// Clear mocks to track new calls
jest.clearAllMocks();
// Re-initialize - should still run initialization because process flag was reset
await MCPServersInitializer.initialize(testConfigs);
// Stale server should be gone because registry.reset() was called
expect(await registry.getServerConfig('stale_server')).toBeUndefined();
// Real servers should be present
expect(await registry.getServerConfig('file_tools_server')).toBeDefined();
expect(await registry.getServerConfig('disabled_server')).toBeDefined();
// Inspector should have been called (proving re-initialization happened)
expect((MCPServerInspector.inspect as jest.Mock).mock.calls.length).toBeGreaterThan(0);
});
it('should skip re-initialization on subsequent calls within same process', async () => {
// First initialization
await MCPServersInitializer.initialize(testConfigs);
expect(await registryStatusCache.isInitialized()).toBe(true);
// Clear mocks
jest.clearAllMocks();
// Second call in same process should skip
await MCPServersInitializer.initialize(testConfigs);
// Inspector should NOT have been called
expect((MCPServerInspector.inspect as jest.Mock).mock.calls.length).toBe(0);
});
it('should clear stale data from Redis when a new instance becomes leader', async () => {
// Initial setup with testConfigs
await MCPServersInitializer.initialize(testConfigs);
// Add stale data that shouldn't exist after next initialization
await registry.addServer(
'should_be_removed',
{
type: 'stdio',
command: 'node',
args: ['old.js'],
},
'CACHE',
);
// Verify stale data exists
expect(await registry.getServerConfig('should_be_removed')).toBeDefined();
// Simulate new process starting (reset process flag)
MCPServersInitializer.resetProcessFlag();
// Initialize with different configs (fewer servers)
const reducedConfigs: t.MCPServers = {
file_tools_server: testConfigs.file_tools_server,
};
await MCPServersInitializer.initialize(reducedConfigs);
// Stale server from previous config should be gone
expect(await registry.getServerConfig('should_be_removed')).toBeUndefined();
// Server not in new configs should be gone
expect(await registry.getServerConfig('disabled_server')).toBeUndefined();
// Only server in new configs should exist
expect(await registry.getServerConfig('file_tools_server')).toBeDefined();
});
it('should work correctly when multiple instances share Redis (leader handles init)', async () => {
// First instance initializes (we are the leader)
await MCPServersInitializer.initialize(testConfigs);
// Verify initialized state is in Redis
expect(await registryStatusCache.isInitialized()).toBe(true);
// Verify servers are in Redis
const fileToolsServer = await registry.getServerConfig('file_tools_server');
expect(fileToolsServer).toBeDefined();
expect(fileToolsServer?.tools).toBe('file_read, file_write');
// Simulate second instance starting (reset process flag but keep Redis data)
MCPServersInitializer.resetProcessFlag();
jest.clearAllMocks();
// Second instance initializes - should still process because isFirstCallThisProcess
await MCPServersInitializer.initialize(testConfigs);
// Redis should still have correct data
expect(await registryStatusCache.isInitialized()).toBe(true);
expect(await registry.getServerConfig('file_tools_server')).toBeDefined();
});
});
});

View file

@ -179,9 +179,10 @@ describe('MCPServersInitializer', () => {
} as unknown as t.ParsedServerConfig;
});
// Reset caches before each test
// Reset caches and process flag before each test
await registryStatusCache.reset();
await registry.reset();
MCPServersInitializer.resetProcessFlag();
jest.clearAllMocks();
});
@ -309,5 +310,51 @@ describe('MCPServersInitializer', () => {
expect(await registryStatusCache.isInitialized()).toBe(true);
});
it('should re-initialize on first call even if Redis cache says initialized (simulating app restart)', async () => {
// First initialization - populates caches
await MCPServersInitializer.initialize(testConfigs);
expect(await registryStatusCache.isInitialized()).toBe(true);
expect(await registry.getServerConfig('file_tools_server')).toBeDefined();
// Simulate stale data: add an extra server that shouldn't be there
await registry.addServer('stale_server', testConfigs.file_tools_server, 'CACHE');
expect(await registry.getServerConfig('stale_server')).toBeDefined();
jest.clearAllMocks();
// Simulate app restart by resetting the process flag
// In real scenario, this happens automatically when process restarts
MCPServersInitializer.resetProcessFlag();
// Re-initialize - should reset caches even though Redis says initialized
await MCPServersInitializer.initialize(testConfigs);
// Verify stale server was removed (cache was reset)
expect(await registry.getServerConfig('stale_server')).toBeUndefined();
// Verify new servers are present
expect(await registry.getServerConfig('file_tools_server')).toBeDefined();
expect(await registry.getServerConfig('oauth_server')).toBeDefined();
// Verify inspector was called again (re-initialization happened)
expect(mockInspect).toHaveBeenCalled();
});
it('should not re-initialize on subsequent calls within same process', async () => {
// First initialization (5 servers in testConfigs)
await MCPServersInitializer.initialize(testConfigs);
expect(mockInspect).toHaveBeenCalledTimes(5);
jest.clearAllMocks();
// Second call - should skip because process flag is set and Redis says initialized
await MCPServersInitializer.initialize(testConfigs);
expect(mockInspect).not.toHaveBeenCalled();
// Third call - still skips
await MCPServersInitializer.initialize(testConfigs);
expect(mockInspect).not.toHaveBeenCalled();
});
});
});

View file

@ -5,8 +5,8 @@ import logger from '~/config/winston';
import { nanoid } from 'nanoid';
const NORMALIZED_LIMIT_DEFAULT = 20;
const MAX_CREATE_RETRIES = 3;
const RETRY_BASE_DELAY_MS = 10;
const MAX_CREATE_RETRIES = 5;
const RETRY_BASE_DELAY_MS = 25;
/**
* Helper to check if an error is a MongoDB duplicate key error.