🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850)

* chore: remove unused redis file

* chore: bump keyv dependencies, and update related imports

* refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible

* fix: Set max listeners to expected amount

* WIP: memory improvements

* refactor: Simplify getAbortData assignment in createAbortController

* refactor: Update getAbortData to use WeakRef for content management

* WIP: memory improvements in agent chat requests

* refactor: Enhance memory management with finalization registry and cleanup functions

* refactor: Simplify domainParser calls by removing unnecessary request parameter

* refactor: Update parameter types for action tools and agent loading functions to use minimal configs

* refactor: Simplify domainParser tests by removing unnecessary request parameter

* refactor: Simplify domainParser call by removing unnecessary request parameter

* refactor: Enhance client disposal by nullifying additional properties to improve memory management

* refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup

* refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed

* refactor: Update createMCPTool to derive userId from config and handle abort signals

* refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values

* refactor: Update getMCPManager to accept userId parameter for improved idle connection handling

* refactor: Extract logToolError function for improved error handling in AgentClient

* refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run

* refactor: Extract createHandleNewToken function to streamline token handling in initializeClient

* chore: bump @librechat/agents

* refactor: Improve timeout handling in addTitle function for better error management

* refactor: Introduce createFetch instead of using class method

* refactor: Enhance client disposal and request data handling in AskController and EditController

* refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths

* refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks

* refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController

* refactor: Improve logging structure and parameter handling in OpenAIClient

* refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient

* refactor: Simplify client initialization in AskController and EditController

* refactor: Remove unused mock functions and implement in-memory store for KeyvMongo

* chore: Update dependencies in package-lock.json to latest versions

* refactor: Await token usage recording in OpenAIClient to ensure proper async handling

* refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic

* refactor: Enhance abort controller logic by managing abortKey more effectively

* refactor: Add newConversation handling in useEventHandlers for improved conversation management

* fix: dropparams

* refactor: Use optional chaining for safer access to request properties in BaseClient

* refactor: Move client disposal and request data processing logic to cleanup module for better organization

* refactor: Remove aborted request check from addTitle function for cleaner logic

* feat: Add Grok 3 model pricing and update tests for new models

* chore: Remove trace warnings and inspect flags from backend start script used for debugging

* refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry

* refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references

* chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
This commit is contained in:
Danny Avila 2025-04-12 18:46:36 -04:00 committed by GitHub
parent 1e6b1b9554
commit 37964975c1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
68 changed files with 1796 additions and 623 deletions

View file

@ -63,6 +63,21 @@ const noSystemModelRegex = [/\bo1\b/gi];
// const { getFormattedMemories } = require('~/models/Memory');
// const { getCurrentDateTime } = require('~/utils');
function createTokenCounter(encoding) {
return (message) => {
const countTokens = (text) => Tokenizer.getTokenCount(text, encoding);
return getTokenCountForMessage(message, countTokens);
};
}
function logToolError(graph, error, toolId) {
logger.error(
'[api/server/controllers/agents/client.js #chatCompletion] Tool Error',
error,
toolId,
);
}
class AgentClient extends BaseClient {
constructor(options = {}) {
super(null, options);
@ -535,6 +550,10 @@ class AgentClient extends BaseClient {
}
async chatCompletion({ payload, abortController = null }) {
/** @type {Partial<RunnableConfig> & { version: 'v1' | 'v2'; run_id?: string; streamMode: string }} */
let config;
/** @type {ReturnType<createRun>} */
let run;
try {
if (!abortController) {
abortController = new AbortController();
@ -632,11 +651,11 @@ class AgentClient extends BaseClient {
/** @type {TCustomConfig['endpoints']['agents']} */
const agentsEConfig = this.options.req.app.locals[EModelEndpoint.agents];
/** @type {Partial<RunnableConfig> & { version: 'v1' | 'v2'; run_id?: string; streamMode: string }} */
const config = {
config = {
configurable: {
thread_id: this.conversationId,
last_agent_index: this.agentConfigs?.size ?? 0,
user_id: this.user ?? this.options.req.user?.id,
hide_sequential_outputs: this.options.agent.hide_sequential_outputs,
},
recursionLimit: agentsEConfig?.recursionLimit,
@ -655,15 +674,6 @@ class AgentClient extends BaseClient {
initialMessages = formatContentStrings(initialMessages);
}
/** @type {ReturnType<createRun>} */
let run;
const countTokens = ((text) => this.getTokenCount(text)).bind(this);
/** @type {(message: BaseMessage) => number} */
const tokenCounter = (message) => {
return getTokenCountForMessage(message, countTokens);
};
/**
*
* @param {Agent} agent
@ -767,19 +777,14 @@ class AgentClient extends BaseClient {
run.Graph.contentData = contentData;
}
const encoding = this.getEncoding();
await run.processStream({ messages }, config, {
keepContent: i !== 0,
tokenCounter,
tokenCounter: createTokenCounter(encoding),
indexTokenCountMap: currentIndexCountMap,
maxContextTokens: agent.maxContextTokens,
callbacks: {
[Callback.TOOL_ERROR]: (graph, error, toolId) => {
logger.error(
'[api/server/controllers/agents/client.js #chatCompletion] Tool Error',
error,
toolId,
);
},
[Callback.TOOL_ERROR]: logToolError,
},
});
};
@ -809,6 +814,8 @@ class AgentClient extends BaseClient {
break;
}
}
const encoding = this.getEncoding();
const tokenCounter = createTokenCounter(encoding);
for (const [agentId, agent] of this.agentConfigs) {
if (abortController.signal.aborted === true) {
break;
@ -917,7 +924,7 @@ class AgentClient extends BaseClient {
* @param {string} params.text
* @param {string} params.conversationId
*/
async titleConvo({ text }) {
async titleConvo({ text, abortController }) {
if (!this.run) {
throw new Error('Run not initialized');
}
@ -950,6 +957,7 @@ class AgentClient extends BaseClient {
contentParts: this.contentParts,
clientOptions,
chainOptions: {
signal: abortController.signal,
callbacks: [
{
handleLLMEnd,
@ -975,7 +983,7 @@ class AgentClient extends BaseClient {
};
});
this.recordCollectedUsage({
await this.recordCollectedUsage({
model: clientOptions.model,
context: 'title',
collectedUsage,