🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850)

* chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2026-01-31 06:45:17 +01:00 · 2025-04-12 18:46:36 -04:00 · 2025-04-12 18:46:36 -04:00 · 37964975c1
commit 37964975c1
parent 1e6b1b9554
68 changed files with 1796 additions and 623 deletions
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@ -63,6 +63,21 @@ const noSystemModelRegex = [/\bo1\b/gi];
 // const { getFormattedMemories } = require('~/models/Memory');
 // const { getCurrentDateTime } = require('~/utils');

+function createTokenCounter(encoding) {
+  return (message) => {
+    const countTokens = (text) => Tokenizer.getTokenCount(text, encoding);
+    return getTokenCountForMessage(message, countTokens);
+  };
+}
+
+function logToolError(graph, error, toolId) {
+  logger.error(
+    '[api/server/controllers/agents/client.js #chatCompletion] Tool Error',
+    error,
+    toolId,
+  );
+}
+
 class AgentClient extends BaseClient {
  constructor(options = {}) {
    super(null, options);
@ -535,6 +550,10 @@ class AgentClient extends BaseClient {
  }

  async chatCompletion({ payload, abortController = null }) {
+    /** @type {Partial<RunnableConfig> & { version: 'v1' | 'v2'; run_id?: string; streamMode: string }} */
+    let config;
+    /** @type {ReturnType<createRun>} */
+    let run;
    try {
      if (!abortController) {
        abortController = new AbortController();
@ -632,11 +651,11 @@ class AgentClient extends BaseClient {
      /** @type {TCustomConfig['endpoints']['agents']} */
      const agentsEConfig = this.options.req.app.locals[EModelEndpoint.agents];

-      /** @type {Partial<RunnableConfig> & { version: 'v1' | 'v2'; run_id?: string; streamMode: string }} */
-      const config = {
+      config = {
        configurable: {
          thread_id: this.conversationId,
          last_agent_index: this.agentConfigs?.size ?? 0,
+          user_id: this.user ?? this.options.req.user?.id,
          hide_sequential_outputs: this.options.agent.hide_sequential_outputs,
        },
        recursionLimit: agentsEConfig?.recursionLimit,
@ -655,15 +674,6 @@ class AgentClient extends BaseClient {
        initialMessages = formatContentStrings(initialMessages);
      }

-      /** @type {ReturnType<createRun>} */
-      let run;
-      const countTokens = ((text) => this.getTokenCount(text)).bind(this);
-
-      /** @type {(message: BaseMessage) => number} */
-      const tokenCounter = (message) => {
-        return getTokenCountForMessage(message, countTokens);
-      };
-
      /**
       *
       * @param {Agent} agent
@ -767,19 +777,14 @@ class AgentClient extends BaseClient {
          run.Graph.contentData = contentData;
        }

+        const encoding = this.getEncoding();
        await run.processStream({ messages }, config, {
          keepContent: i !== 0,
-          tokenCounter,
+          tokenCounter: createTokenCounter(encoding),
          indexTokenCountMap: currentIndexCountMap,
          maxContextTokens: agent.maxContextTokens,
          callbacks: {
-            [Callback.TOOL_ERROR]: (graph, error, toolId) => {
-              logger.error(
-                '[api/server/controllers/agents/client.js #chatCompletion] Tool Error',
-                error,
-                toolId,
-              );
-            },
+            [Callback.TOOL_ERROR]: logToolError,
          },
        });
      };
@ -809,6 +814,8 @@ class AgentClient extends BaseClient {
            break;
          }
        }
+        const encoding = this.getEncoding();
+        const tokenCounter = createTokenCounter(encoding);
        for (const [agentId, agent] of this.agentConfigs) {
          if (abortController.signal.aborted === true) {
            break;
@ -917,7 +924,7 @@ class AgentClient extends BaseClient {
   * @param {string} params.text
   * @param {string} params.conversationId
   */
-  async titleConvo({ text }) {
+  async titleConvo({ text, abortController }) {
    if (!this.run) {
      throw new Error('Run not initialized');
    }
@ -950,6 +957,7 @@ class AgentClient extends BaseClient {
        contentParts: this.contentParts,
        clientOptions,
        chainOptions: {
+          signal: abortController.signal,
          callbacks: [
            {
              handleLLMEnd,
@ -975,7 +983,7 @@ class AgentClient extends BaseClient {
        };
      });

-      this.recordCollectedUsage({
+      await this.recordCollectedUsage({
        model: clientOptions.model,
        context: 'title',
        collectedUsage,