🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850)

* chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2026-03-08 00:52:37 +01:00 · 2025-04-12 18:46:36 -04:00 · 2025-04-12 18:46:36 -04:00 · 37964975c1
commit 37964975c1
parent 1e6b1b9554
68 changed files with 1796 additions and 623 deletions
--- a/api/server/services/ActionService.js
+++ b/api/server/services/ActionService.js
@ -50,7 +50,7 @@ const validateAndUpdateTool = async ({ req, tool, assistant_id }) => {
      return null;
    }

-    const parsedDomain = await domainParser(req, domain, true);
+    const parsedDomain = await domainParser(domain, true);

    if (!parsedDomain) {
      return null;
@ -66,12 +66,11 @@ const validateAndUpdateTool = async ({ req, tool, assistant_id }) => {
 *
 * Necessary due to `[a-zA-Z0-9_-]*` Regex Validation, limited to a 64-character maximum.
 *
- * @param {Express.Request} req - The Express Request object.
 * @param {string} domain - The domain name to encode/decode.
 * @param {boolean} inverse - False to decode from base64, true to encode to base64.
 * @returns {Promise<string>} Encoded or decoded domain string.
 */
-async function domainParser(req, domain, inverse = false) {
+async function domainParser(domain, inverse = false) {
  if (!domain) {
    return;
  }
@ -122,7 +121,7 @@ async function loadActionSets(searchParams) {
 * Creates a general tool for an entire action set.
 *
 * @param {Object} params - The parameters for loading action sets.
- * @param {ServerRequest} params.req
+ * @param {string} params.userId
 * @param {ServerResponse} params.res
 * @param {Action} params.action - The action set. Necessary for decrypting authentication values.
 * @param {ActionRequest} params.requestBuilder - The ActionRequest builder class to execute the API call.
@ -133,7 +132,7 @@ async function loadActionSets(searchParams) {
 * @returns { Promise<typeof tool | { _call: (toolInput: Object | string) => unknown}> } An object with `_call` method to execute the tool input.
 */
 async function createActionTool({
-  req,
+  userId,
  res,
  action,
  requestBuilder,
@ -154,7 +153,7 @@ async function createActionTool({
        try {
          if (metadata.auth.type === AuthTypeEnum.OAuth && metadata.auth.authorization_url) {
            const action_id = action.action_id;
-            const identifier = `${req.user.id}:${action.action_id}`;
+            const identifier = `${userId}:${action.action_id}`;
            const requestLogin = async () => {
              const { args: _args, stepId, ...toolCall } = config.toolCall ?? {};
              if (!stepId) {
@ -162,7 +161,7 @@ async function createActionTool({
              }
              const statePayload = {
                nonce: nanoid(),
-                user: req.user.id,
+                user: userId,
                action_id,
              };

@ -206,7 +205,7 @@ async function createActionTool({
                  'oauth',
                  {
                    state: stateToken,
-                    userId: req.user.id,
+                    userId: userId,
                    client_url: metadata.auth.client_url,
                    redirect_uri: `${process.env.DOMAIN_CLIENT}/api/actions/${action_id}/oauth/callback`,
                    /** Encrypted values */
@ -232,10 +231,10 @@ async function createActionTool({
            };

            const tokenPromises = [];
-            tokenPromises.push(findToken({ userId: req.user.id, type: 'oauth', identifier }));
+            tokenPromises.push(findToken({ userId, type: 'oauth', identifier }));
            tokenPromises.push(
              findToken({
-                userId: req.user.id,
+                userId,
                type: 'oauth_refresh',
                identifier: `${identifier}:refresh`,
              }),
@ -258,9 +257,9 @@ async function createActionTool({
                const refresh_token = await decryptV2(refreshTokenData.token);
                const refreshTokens = async () =>
                  await refreshAccessToken({
+                    userId,
                    identifier,
                    refresh_token,
-                    userId: req.user.id,
                    client_url: metadata.auth.client_url,
                    encrypted_oauth_client_id: encrypted.oauth_client_id,
                    encrypted_oauth_client_secret: encrypted.oauth_client_secret,
--- a/api/server/services/ActionService.spec.js
+++ b/api/server/services/ActionService.spec.js
@ -78,20 +78,20 @@ describe('domainParser', () => {
  // Non-azure request
  it('does not return domain as is if not azure', async () => {
    const domain = `example.com${actionDomainSeparator}test${actionDomainSeparator}`;
-    const result1 = await domainParser(reqNoAzure, domain, false);
-    const result2 = await domainParser(reqNoAzure, domain, true);
+    const result1 = await domainParser(domain, false);
+    const result2 = await domainParser(domain, true);
    expect(result1).not.toEqual(domain);
    expect(result2).not.toEqual(domain);
  });

  // Test for Empty or Null Inputs
  it('returns undefined for null domain input', async () => {
-    const result = await domainParser(req, null, true);
+    const result = await domainParser(null, true);
    expect(result).toBeUndefined();
  });

  it('returns undefined for empty domain input', async () => {
-    const result = await domainParser(req, '', true);
+    const result = await domainParser('', true);
    expect(result).toBeUndefined();
  });

@ -102,7 +102,7 @@ describe('domainParser', () => {
      .toString('base64')
      .substring(0, Constants.ENCODED_DOMAIN_LENGTH);

-    await domainParser(req, domain, true);
+    await domainParser(domain, true);

    const cachedValue = await globalCache[encodedDomain];
    expect(cachedValue).toEqual(Buffer.from(domain).toString('base64'));
@ -112,14 +112,14 @@ describe('domainParser', () => {
  it('encodes domain exactly at threshold without modification', async () => {
    const domain = 'a'.repeat(Constants.ENCODED_DOMAIN_LENGTH - TLD.length) + TLD;
    const expected = domain.replace(/\./g, actionDomainSeparator);
-    const result = await domainParser(req, domain, true);
+    const result = await domainParser(domain, true);
    expect(result).toEqual(expected);
  });

  it('encodes domain just below threshold without modification', async () => {
    const domain = 'a'.repeat(Constants.ENCODED_DOMAIN_LENGTH - 1 - TLD.length) + TLD;
    const expected = domain.replace(/\./g, actionDomainSeparator);
-    const result = await domainParser(req, domain, true);
+    const result = await domainParser(domain, true);
    expect(result).toEqual(expected);
  });

@ -129,7 +129,7 @@ describe('domainParser', () => {
    const encodedDomain = Buffer.from(unicodeDomain)
      .toString('base64')
      .substring(0, Constants.ENCODED_DOMAIN_LENGTH);
-    const result = await domainParser(req, unicodeDomain, true);
+    const result = await domainParser(unicodeDomain, true);
    expect(result).toEqual(encodedDomain);
  });

@ -139,7 +139,6 @@ describe('domainParser', () => {
    globalCache[encodedDomain.substring(0, Constants.ENCODED_DOMAIN_LENGTH)] = encodedDomain; // Simulate caching

    const result = await domainParser(
-      req,
      encodedDomain.substring(0, Constants.ENCODED_DOMAIN_LENGTH),
      false,
    );
@ -150,27 +149,27 @@ describe('domainParser', () => {
  it('returns domain with replaced separators if no cached domain exists', async () => {
    const domain = 'example.com';
    const withSeparator = domain.replace(/\./g, actionDomainSeparator);
-    const result = await domainParser(req, withSeparator, false);
+    const result = await domainParser(withSeparator, false);
    expect(result).toEqual(domain);
  });

  it('returns domain with replaced separators when inverse is false and under encoding length', async () => {
    const domain = 'examp.com';
    const withSeparator = domain.replace(/\./g, actionDomainSeparator);
-    const result = await domainParser(req, withSeparator, false);
+    const result = await domainParser(withSeparator, false);
    expect(result).toEqual(domain);
  });

  it('replaces periods with actionDomainSeparator when inverse is true and under encoding length', async () => {
    const domain = 'examp.com';
    const expected = domain.replace(/\./g, actionDomainSeparator);
-    const result = await domainParser(req, domain, true);
+    const result = await domainParser(domain, true);
    expect(result).toEqual(expected);
  });

  it('encodes domain when length is above threshold and inverse is true', async () => {
    const domain = 'a'.repeat(Constants.ENCODED_DOMAIN_LENGTH + 1).concat('.com');
-    const result = await domainParser(req, domain, true);
+    const result = await domainParser(domain, true);
    expect(result).not.toEqual(domain);
    expect(result.length).toBeLessThanOrEqual(Constants.ENCODED_DOMAIN_LENGTH);
  });
@ -180,20 +179,20 @@ describe('domainParser', () => {
    const encodedDomain = Buffer.from(
      originalDomain.replace(/\./g, actionDomainSeparator),
    ).toString('base64');
-    const result = await domainParser(req, encodedDomain, false);
+    const result = await domainParser(encodedDomain, false);
    expect(result).toEqual(encodedDomain);
  });

  it('decodes encoded value if cached and encoded value is provided, and inverse is false', async () => {
    const originalDomain = 'example.com';
-    const encodedDomain = await domainParser(req, originalDomain, true);
-    const result = await domainParser(req, encodedDomain, false);
+    const encodedDomain = await domainParser(originalDomain, true);
+    const result = await domainParser(encodedDomain, false);
    expect(result).toEqual(originalDomain);
  });

  it('handles invalid base64 encoded values gracefully', async () => {
    const invalidBase64Domain = 'not_base64_encoded';
-    const result = await domainParser(req, invalidBase64Domain, false);
+    const result = await domainParser(invalidBase64Domain, false);
    expect(result).toEqual(invalidBase64Domain);
  });
 });
--- a/api/server/services/Endpoints/agents/initialize.js
+++ b/api/server/services/Endpoints/agents/initialize.js
@ -159,14 +159,20 @@ const initializeAgentOptions = async ({
    currentFiles,
    agent.tool_resources,
  );
+
+  const provider = agent.provider;
  const { tools, toolContextMap } = await loadAgentTools({
    req,
    res,
-    agent,
+    agent: {
+      id: agent.id,
+      tools: agent.tools,
+      provider,
+      model: agent.model,
+    },
    tool_resources,
  });

-  const provider = agent.provider;
  agent.endpoint = provider;
  let getOptions = providerConfigMap[provider];
  if (!getOptions && providerConfigMap[provider.toLowerCase()] != null) {
--- a/api/server/services/Endpoints/agents/title.js
+++ b/api/server/services/Endpoints/agents/title.js
@ -2,7 +2,11 @@ const { CacheKeys } = require('librechat-data-provider');
 const getLogStores = require('~/cache/getLogStores');
 const { isEnabled } = require('~/server/utils');
 const { saveConvo } = require('~/models');
+const { logger } = require('~/config');

+/**
+ * Add title to conversation in a way that avoids memory retention
+ */
 const addTitle = async (req, { text, response, client }) => {
  const { TITLE_CONVO = true } = process.env ?? {};
  if (!isEnabled(TITLE_CONVO)) {
@ -13,37 +17,55 @@ const addTitle = async (req, { text, response, client }) => {
    return;
  }

-  // If the request was aborted, don't generate the title.
-  if (client.abortController.signal.aborted) {
-    return;
-  }
-
  const titleCache = getLogStores(CacheKeys.GEN_TITLE);
  const key = `${req.user.id}-${response.conversationId}`;
-  const responseText =
-    response?.content && Array.isArray(response?.content)
-      ? response.content.reduce((acc, block) => {
-        if (block?.type === 'text') {
-          return acc + block.text;
-        }
-        return acc;
-      }, '')
-      : (response?.content ?? response?.text ?? '');
+  /** @type {NodeJS.Timeout} */
+  let timeoutId;
+  try {
+    const timeoutPromise = new Promise((_, reject) => {
+      timeoutId = setTimeout(() => reject(new Error('Title generation timeout')), 25000);
+    }).catch((error) => {
+      logger.error('Title error:', error);
+    });

-  const title = await client.titleConvo({
-    text,
-    responseText,
-    conversationId: response.conversationId,
-  });
-  await titleCache.set(key, title, 120000);
-  await saveConvo(
-    req,
-    {
-      conversationId: response.conversationId,
-      title,
-    },
-    { context: 'api/server/services/Endpoints/agents/title.js' },
-  );
+    let titlePromise;
+    let abortController = new AbortController();
+    if (client && typeof client.titleConvo === 'function') {
+      titlePromise = Promise.race([
+        client
+          .titleConvo({
+            text,
+            abortController,
+          })
+          .catch((error) => {
+            logger.error('Client title error:', error);
+          }),
+        timeoutPromise,
+      ]);
+    } else {
+      return;
+    }
+
+    const title = await titlePromise;
+    if (!abortController.signal.aborted) {
+      abortController.abort();
+    }
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+    }
+
+    await titleCache.set(key, title, 120000);
+    await saveConvo(
+      req,
+      {
+        conversationId: response.conversationId,
+        title,
+      },
+      { context: 'api/server/services/Endpoints/agents/title.js' },
+    );
+  } catch (error) {
+    logger.error('Error generating title:', error);
+  }
 };

 module.exports = addTitle;
--- a/api/server/services/Endpoints/anthropic/initialize.js
+++ b/api/server/services/Endpoints/anthropic/initialize.js
@ -1,7 +1,7 @@
 const { EModelEndpoint } = require('librechat-data-provider');
 const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService');
 const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm');
-const { AnthropicClient } = require('~/app');
+const AnthropicClient = require('~/app/clients/AnthropicClient');

 const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => {
  const { ANTHROPIC_API_KEY, ANTHROPIC_REVERSE_PROXY, PROXY } = process.env;
--- a/api/server/services/Endpoints/anthropic/title.js
+++ b/api/server/services/Endpoints/anthropic/title.js
@ -13,11 +13,6 @@ const addTitle = async (req, { text, response, client }) => {
    return;
  }

-  // If the request was aborted, don't generate the title.
-  if (client.abortController.signal.aborted) {
-    return;
-  }
-
  const titleCache = getLogStores(CacheKeys.GEN_TITLE);
  const key = `${req.user.id}-${response.conversationId}`;

--- a/api/server/services/Endpoints/custom/initialize.js
+++ b/api/server/services/Endpoints/custom/initialize.js
@ -11,8 +11,8 @@ const { getLLMConfig } = require('~/server/services/Endpoints/openAI/llm');
 const { getCustomEndpointConfig } = require('~/server/services/Config');
 const { fetchModels } = require('~/server/services/ModelService');
 const { isUserProvided, sleep } = require('~/server/utils');
+const OpenAIClient = require('~/app/clients/OpenAIClient');
 const getLogStores = require('~/cache/getLogStores');
-const { OpenAIClient } = require('~/app');

 const { PROXY } = process.env;

--- a/api/server/services/Endpoints/openAI/initialize.js
+++ b/api/server/services/Endpoints/openAI/initialize.js
@ -7,8 +7,14 @@ const {
 const { getUserKeyValues, checkUserKeyExpiry } = require('~/server/services/UserService');
 const { getLLMConfig } = require('~/server/services/Endpoints/openAI/llm');
 const { isEnabled, isUserProvided, sleep } = require('~/server/utils');
+const OpenAIClient = require('~/app/clients/OpenAIClient');
 const { getAzureCredentials } = require('~/utils');
-const { OpenAIClient } = require('~/app');
+
+function createHandleNewToken(streamRate) {
+  async () => {
+    await sleep(streamRate);
+  };
+}

 const initializeClient = async ({
  req,
@ -140,14 +146,13 @@ const initializeClient = async ({
    clientOptions = Object.assign({ modelOptions }, clientOptions);
    clientOptions.modelOptions.user = req.user.id;
    const options = getLLMConfig(apiKey, clientOptions);
-    if (!clientOptions.streamRate) {
+    const streamRate = clientOptions.streamRate;
+    if (!streamRate) {
      return options;
    }
    options.llmConfig.callbacks = [
      {
-        handleLLMNewToken: async () => {
-          await sleep(clientOptions.streamRate);
-        },
+        handleLLMNewToken: createHandleNewToken(streamRate),
      },
    ];
    return options;
--- a/api/server/services/Endpoints/openAI/title.js
+++ b/api/server/services/Endpoints/openAI/title.js
@ -13,11 +13,6 @@ const addTitle = async (req, { text, response, client }) => {
    return;
  }

-  // If the request was aborted and is not azure, don't generate the title.
-  if (!client.azure && client.abortController.signal.aborted) {
-    return;
-  }
-
  const titleCache = getLogStores(CacheKeys.GEN_TITLE);
  const key = `${req.user.id}-${response.conversationId}`;

--- a/api/server/services/MCP.js
+++ b/api/server/services/MCP.js
@ -37,9 +37,8 @@ async function createMCPTool({ req, toolKey, provider }) {
  }

  const [toolName, serverName] = toolKey.split(Constants.mcp_delimiter);
-  const userId = req.user?.id;

-  if (!userId) {
+  if (!req.user?.id) {
    logger.error(
      `[MCP][${serverName}][${toolName}] User ID not found on request. Cannot create tool.`,
    );
@ -49,15 +48,16 @@ async function createMCPTool({ req, toolKey, provider }) {
  /** @type {(toolArguments: Object | string, config?: GraphRunnableConfig) => Promise<unknown>} */
  const _call = async (toolArguments, config) => {
    try {
-      const mcpManager = getMCPManager();
+      const derivedSignal = config?.signal ? AbortSignal.any([config.signal]) : undefined;
+      const mcpManager = getMCPManager(config?.userId);
      const result = await mcpManager.callTool({
        serverName,
        toolName,
        provider,
        toolArguments,
        options: {
-          userId,
-          signal: config?.signal,
+          userId: config?.configurable?.user_id,
+          signal: derivedSignal,
        },
      });

@ -70,7 +70,7 @@ async function createMCPTool({ req, toolKey, provider }) {
      return result;
    } catch (error) {
      logger.error(
-        `[MCP][User: ${userId}][${serverName}] Error calling "${toolName}" MCP tool:`,
+        `[MCP][User: ${config?.userId}][${serverName}] Error calling "${toolName}" MCP tool:`,
        error,
      );
      throw new Error(
--- a/api/server/services/ToolService.js
+++ b/api/server/services/ToolService.js
@ -334,7 +334,7 @@ async function processRequiredActions(client, requiredActions) {
        const domainMap = new Map();

        for (const action of actionSets) {
-          const domain = await domainParser(client.req, action.metadata.domain, true);
+          const domain = await domainParser(action.metadata.domain, true);
          domainMap.set(domain, action);

          // Check if domain is allowed
@ -404,7 +404,7 @@ async function processRequiredActions(client, requiredActions) {

      // We've already decrypted the metadata, so we can pass it directly
      tool = await createActionTool({
-        req: client.req,
+        userId: client.req.user.id,
        res: client.res,
        action,
        requestBuilder,
@ -458,7 +458,7 @@ async function processRequiredActions(client, requiredActions) {
 * @param {Object} params - Run params containing user and request information.
 * @param {ServerRequest} params.req - The request object.
 * @param {ServerResponse} params.res - The request object.
- * @param {Agent} params.agent - The agent to load tools for.
+ * @param {Pick<Agent, 'id' | 'provider' | 'model' | 'tools'} params.agent - The agent to load tools for.
 * @param {string | undefined} [params.openAIApiKey] - The OpenAI API key.
 * @returns {Promise<{ tools?: StructuredTool[] }>} The agent tools.
 */
@ -570,7 +570,7 @@ async function loadAgentTools({ req, res, agent, tool_resources, openAIApiKey })
  const domainMap = new Map();

  for (const action of actionSets) {
-    const domain = await domainParser(req, action.metadata.domain, true);
+    const domain = await domainParser(action.metadata.domain, true);
    domainMap.set(domain, action);

    // Check if domain is allowed (do this once per action set)
@ -639,7 +639,7 @@ async function loadAgentTools({ req, res, agent, tool_resources, openAIApiKey })

    if (requestBuilder) {
      const tool = await createActionTool({
-        req,
+        userId: req.user.id,
        res,
        action,
        requestBuilder,