mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850)
* chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
This commit is contained in:
parent
1e6b1b9554
commit
37964975c1
68 changed files with 1796 additions and 623 deletions
|
|
@ -9,7 +9,7 @@ const {
|
|||
getResponseSender,
|
||||
validateVisionModel,
|
||||
} = require('librechat-data-provider');
|
||||
const { SplitStreamHandler: _Handler, GraphEvents } = require('@librechat/agents');
|
||||
const { SplitStreamHandler: _Handler } = require('@librechat/agents');
|
||||
const {
|
||||
truncateText,
|
||||
formatMessage,
|
||||
|
|
@ -26,10 +26,11 @@ const {
|
|||
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
|
||||
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const { createFetch, createStreamEventHandlers } = require('./generators');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { logger, sendEvent } = require('~/config');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const BaseClient = require('./BaseClient');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const HUMAN_PROMPT = '\n\nHuman:';
|
||||
const AI_PROMPT = '\n\nAssistant:';
|
||||
|
|
@ -184,7 +185,10 @@ class AnthropicClient extends BaseClient {
|
|||
getClient(requestOptions) {
|
||||
/** @type {Anthropic.ClientOptions} */
|
||||
const options = {
|
||||
fetch: this.fetch,
|
||||
fetch: createFetch({
|
||||
directEndpoint: this.options.directEndpoint,
|
||||
reverseProxyUrl: this.options.reverseProxyUrl,
|
||||
}),
|
||||
apiKey: this.apiKey,
|
||||
};
|
||||
|
||||
|
|
@ -795,14 +799,11 @@ class AnthropicClient extends BaseClient {
|
|||
}
|
||||
|
||||
logger.debug('[AnthropicClient]', { ...requestOptions });
|
||||
const handlers = createStreamEventHandlers(this.options.res);
|
||||
this.streamHandler = new SplitStreamHandler({
|
||||
accumulate: true,
|
||||
runId: this.responseMessageId,
|
||||
handlers: {
|
||||
[GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
|
||||
[GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
|
||||
[GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
|
||||
},
|
||||
handlers,
|
||||
});
|
||||
|
||||
let intermediateReply = this.streamHandler.tokens;
|
||||
|
|
|
|||
|
|
@ -28,15 +28,10 @@ class BaseClient {
|
|||
month: 'long',
|
||||
day: 'numeric',
|
||||
});
|
||||
this.fetch = this.fetch.bind(this);
|
||||
/** @type {boolean} */
|
||||
this.skipSaveConvo = false;
|
||||
/** @type {boolean} */
|
||||
this.skipSaveUserMessage = false;
|
||||
/** @type {ClientDatabaseSavePromise} */
|
||||
this.userMessagePromise;
|
||||
/** @type {ClientDatabaseSavePromise} */
|
||||
this.responsePromise;
|
||||
/** @type {string} */
|
||||
this.user;
|
||||
/** @type {string} */
|
||||
|
|
@ -564,6 +559,8 @@ class BaseClient {
|
|||
}
|
||||
|
||||
async sendMessage(message, opts = {}) {
|
||||
/** @type {Promise<TMessage>} */
|
||||
let userMessagePromise;
|
||||
const { user, head, isEdited, conversationId, responseMessageId, saveOptions, userMessage } =
|
||||
await this.handleStartMethods(message, opts);
|
||||
|
||||
|
|
@ -625,11 +622,11 @@ class BaseClient {
|
|||
}
|
||||
|
||||
if (!isEdited && !this.skipSaveUserMessage) {
|
||||
this.userMessagePromise = this.saveMessageToDatabase(userMessage, saveOptions, user);
|
||||
userMessagePromise = this.saveMessageToDatabase(userMessage, saveOptions, user);
|
||||
this.savedMessageIds.add(userMessage.messageId);
|
||||
if (typeof opts?.getReqData === 'function') {
|
||||
opts.getReqData({
|
||||
userMessagePromise: this.userMessagePromise,
|
||||
userMessagePromise,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -655,7 +652,9 @@ class BaseClient {
|
|||
|
||||
/** @type {string|string[]|undefined} */
|
||||
const completion = await this.sendCompletion(payload, opts);
|
||||
this.abortController.requestCompleted = true;
|
||||
if (this.abortController) {
|
||||
this.abortController.requestCompleted = true;
|
||||
}
|
||||
|
||||
/** @type {TMessage} */
|
||||
const responseMessage = {
|
||||
|
|
@ -703,7 +702,13 @@ class BaseClient {
|
|||
if (usage != null && Number(usage[this.outputTokensKey]) > 0) {
|
||||
responseMessage.tokenCount = usage[this.outputTokensKey];
|
||||
completionTokens = responseMessage.tokenCount;
|
||||
await this.updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts });
|
||||
await this.updateUserMessageTokenCount({
|
||||
usage,
|
||||
tokenCountMap,
|
||||
userMessage,
|
||||
userMessagePromise,
|
||||
opts,
|
||||
});
|
||||
} else {
|
||||
responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage);
|
||||
completionTokens = responseMessage.tokenCount;
|
||||
|
|
@ -712,8 +717,8 @@ class BaseClient {
|
|||
await this.recordTokenUsage({ promptTokens, completionTokens, usage });
|
||||
}
|
||||
|
||||
if (this.userMessagePromise) {
|
||||
await this.userMessagePromise;
|
||||
if (userMessagePromise) {
|
||||
await userMessagePromise;
|
||||
}
|
||||
|
||||
if (this.artifactPromises) {
|
||||
|
|
@ -728,7 +733,11 @@ class BaseClient {
|
|||
}
|
||||
}
|
||||
|
||||
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
responseMessage.databasePromise = this.saveMessageToDatabase(
|
||||
responseMessage,
|
||||
saveOptions,
|
||||
user,
|
||||
);
|
||||
this.savedMessageIds.add(responseMessage.messageId);
|
||||
delete responseMessage.tokenCount;
|
||||
return responseMessage;
|
||||
|
|
@ -749,9 +758,16 @@ class BaseClient {
|
|||
* @param {StreamUsage} params.usage
|
||||
* @param {Record<string, number>} params.tokenCountMap
|
||||
* @param {TMessage} params.userMessage
|
||||
* @param {Promise<TMessage>} params.userMessagePromise
|
||||
* @param {object} params.opts
|
||||
*/
|
||||
async updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts }) {
|
||||
async updateUserMessageTokenCount({
|
||||
usage,
|
||||
tokenCountMap,
|
||||
userMessage,
|
||||
userMessagePromise,
|
||||
opts,
|
||||
}) {
|
||||
/** @type {boolean} */
|
||||
const shouldUpdateCount =
|
||||
this.calculateCurrentTokenCount != null &&
|
||||
|
|
@ -787,7 +803,7 @@ class BaseClient {
|
|||
Note: we update the user message to be sure it gets the calculated token count;
|
||||
though `AskController` saves the user message, EditController does not
|
||||
*/
|
||||
await this.userMessagePromise;
|
||||
await userMessagePromise;
|
||||
await this.updateMessageInDatabase({
|
||||
messageId: userMessage.messageId,
|
||||
tokenCount: userMessageTokenCount,
|
||||
|
|
@ -853,7 +869,7 @@ class BaseClient {
|
|||
}
|
||||
|
||||
const savedMessage = await saveMessage(
|
||||
this.options.req,
|
||||
this.options?.req,
|
||||
{
|
||||
...message,
|
||||
endpoint: this.options.endpoint,
|
||||
|
|
@ -877,7 +893,7 @@ class BaseClient {
|
|||
const existingConvo =
|
||||
this.fetchedConvo === true
|
||||
? null
|
||||
: await getConvo(this.options.req?.user?.id, message.conversationId);
|
||||
: await getConvo(this.options?.req?.user?.id, message.conversationId);
|
||||
|
||||
const unsetFields = {};
|
||||
const exceptions = new Set(['spec', 'iconURL']);
|
||||
|
|
@ -897,7 +913,7 @@ class BaseClient {
|
|||
}
|
||||
}
|
||||
|
||||
const conversation = await saveConvo(this.options.req, fieldsToKeep, {
|
||||
const conversation = await saveConvo(this.options?.req, fieldsToKeep, {
|
||||
context: 'api/app/clients/BaseClient.js - saveMessageToDatabase #saveConvo',
|
||||
unsetFields,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
const Keyv = require('keyv');
|
||||
const { Keyv } = require('keyv');
|
||||
const crypto = require('crypto');
|
||||
const { CohereClient } = require('cohere-ai');
|
||||
const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
|
||||
|
|
@ -339,7 +339,7 @@ class ChatGPTClient extends BaseClient {
|
|||
opts.body = JSON.stringify(modelOptions);
|
||||
|
||||
if (modelOptions.stream) {
|
||||
// eslint-disable-next-line no-async-promise-executor
|
||||
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
let done = false;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
const OpenAI = require('openai');
|
||||
const { OllamaClient } = require('./OllamaClient');
|
||||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||
const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
|
||||
const { SplitStreamHandler } = require('@librechat/agents');
|
||||
const {
|
||||
Constants,
|
||||
ImageDetail,
|
||||
|
|
@ -32,17 +32,18 @@ const {
|
|||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const { createFetch, createStreamEventHandlers } = require('./generators');
|
||||
const { addSpaceIfNeeded, isEnabled, sleep } = require('~/server/utils');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const { handleOpenAIErrors } = require('./tools/util');
|
||||
const { createLLM, RunManager } = require('./llm');
|
||||
const { logger, sendEvent } = require('~/config');
|
||||
const ChatGPTClient = require('./ChatGPTClient');
|
||||
const { summaryBuffer } = require('./memory');
|
||||
const { runTitleChain } = require('./chains');
|
||||
const { tokenSplit } = require('./document');
|
||||
const BaseClient = require('./BaseClient');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class OpenAIClient extends BaseClient {
|
||||
constructor(apiKey, options = {}) {
|
||||
|
|
@ -609,7 +610,7 @@ class OpenAIClient extends BaseClient {
|
|||
return result.trim();
|
||||
}
|
||||
|
||||
logger.debug('[OpenAIClient] sendCompletion: result', result);
|
||||
logger.debug('[OpenAIClient] sendCompletion: result', { ...result });
|
||||
|
||||
if (this.isChatCompletion) {
|
||||
reply = result.choices[0].message.content;
|
||||
|
|
@ -818,7 +819,7 @@ ${convo}
|
|||
|
||||
const completionTokens = this.getTokenCount(title);
|
||||
|
||||
this.recordTokenUsage({ promptTokens, completionTokens, context: 'title' });
|
||||
await this.recordTokenUsage({ promptTokens, completionTokens, context: 'title' });
|
||||
} catch (e) {
|
||||
logger.error(
|
||||
'[OpenAIClient] There was an issue generating the title with the completion method',
|
||||
|
|
@ -1245,7 +1246,10 @@ ${convo}
|
|||
let chatCompletion;
|
||||
/** @type {OpenAI} */
|
||||
const openai = new OpenAI({
|
||||
fetch: this.fetch,
|
||||
fetch: createFetch({
|
||||
directEndpoint: this.options.directEndpoint,
|
||||
reverseProxyUrl: this.options.reverseProxyUrl,
|
||||
}),
|
||||
apiKey: this.apiKey,
|
||||
...opts,
|
||||
});
|
||||
|
|
@ -1275,12 +1279,13 @@ ${convo}
|
|||
}
|
||||
|
||||
if (this.options.addParams && typeof this.options.addParams === 'object') {
|
||||
const addParams = { ...this.options.addParams };
|
||||
modelOptions = {
|
||||
...modelOptions,
|
||||
...this.options.addParams,
|
||||
...addParams,
|
||||
};
|
||||
logger.debug('[OpenAIClient] chatCompletion: added params', {
|
||||
addParams: this.options.addParams,
|
||||
addParams: addParams,
|
||||
modelOptions,
|
||||
});
|
||||
}
|
||||
|
|
@ -1309,11 +1314,12 @@ ${convo}
|
|||
}
|
||||
|
||||
if (this.options.dropParams && Array.isArray(this.options.dropParams)) {
|
||||
this.options.dropParams.forEach((param) => {
|
||||
const dropParams = [...this.options.dropParams];
|
||||
dropParams.forEach((param) => {
|
||||
delete modelOptions[param];
|
||||
});
|
||||
logger.debug('[OpenAIClient] chatCompletion: dropped params', {
|
||||
dropParams: this.options.dropParams,
|
||||
dropParams: dropParams,
|
||||
modelOptions,
|
||||
});
|
||||
}
|
||||
|
|
@ -1355,15 +1361,12 @@ ${convo}
|
|||
delete modelOptions.reasoning_effort;
|
||||
}
|
||||
|
||||
const handlers = createStreamEventHandlers(this.options.res);
|
||||
this.streamHandler = new SplitStreamHandler({
|
||||
reasoningKey,
|
||||
accumulate: true,
|
||||
runId: this.responseMessageId,
|
||||
handlers: {
|
||||
[GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
|
||||
[GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
|
||||
[GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
|
||||
},
|
||||
handlers,
|
||||
});
|
||||
|
||||
intermediateReply = this.streamHandler.tokens;
|
||||
|
|
|
|||
|
|
@ -252,12 +252,14 @@ class PluginsClient extends OpenAIClient {
|
|||
await this.recordTokenUsage(responseMessage);
|
||||
}
|
||||
|
||||
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
const databasePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
delete responseMessage.tokenCount;
|
||||
return { ...responseMessage, ...result };
|
||||
return { ...responseMessage, ...result, databasePromise };
|
||||
}
|
||||
|
||||
async sendMessage(message, opts = {}) {
|
||||
/** @type {Promise<TMessage>} */
|
||||
let userMessagePromise;
|
||||
/** @type {{ filteredTools: string[], includedTools: string[] }} */
|
||||
const { filteredTools = [], includedTools = [] } = this.options.req.app.locals;
|
||||
|
||||
|
|
@ -327,10 +329,10 @@ class PluginsClient extends OpenAIClient {
|
|||
}
|
||||
|
||||
if (!this.skipSaveUserMessage) {
|
||||
this.userMessagePromise = this.saveMessageToDatabase(userMessage, saveOptions, user);
|
||||
userMessagePromise = this.saveMessageToDatabase(userMessage, saveOptions, user);
|
||||
if (typeof opts?.getReqData === 'function') {
|
||||
opts.getReqData({
|
||||
userMessagePromise: this.userMessagePromise,
|
||||
userMessagePromise,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
60
api/app/clients/generators.js
Normal file
60
api/app/clients/generators.js
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
const { GraphEvents } = require('@librechat/agents');
|
||||
const { logger, sendEvent } = require('~/config');
|
||||
|
||||
/**
|
||||
* Makes a function to make HTTP request and logs the process.
|
||||
* @param {Object} params
|
||||
* @param {boolean} [params.directEndpoint] - Whether to use a direct endpoint.
|
||||
* @param {string} [params.reverseProxyUrl] - The reverse proxy URL to use for the request.
|
||||
* @returns {Promise<Response>} - A promise that resolves to the response of the fetch request.
|
||||
*/
|
||||
function createFetch({ directEndpoint = false, reverseProxyUrl = '' }) {
|
||||
/**
|
||||
* Makes an HTTP request and logs the process.
|
||||
* @param {RequestInfo} url - The URL to make the request to. Can be a string or a Request object.
|
||||
* @param {RequestInit} [init] - Optional init options for the request.
|
||||
* @returns {Promise<Response>} - A promise that resolves to the response of the fetch request.
|
||||
*/
|
||||
return async (_url, init) => {
|
||||
let url = _url;
|
||||
if (directEndpoint) {
|
||||
url = reverseProxyUrl;
|
||||
}
|
||||
logger.debug(`Making request to ${url}`);
|
||||
if (typeof Bun !== 'undefined') {
|
||||
return await fetch(url, init);
|
||||
}
|
||||
return await fetch(url, init);
|
||||
};
|
||||
}
|
||||
|
||||
// Add this at the module level outside the class
|
||||
/**
|
||||
* Creates event handlers for stream events that don't capture client references
|
||||
* @param {Object} res - The response object to send events to
|
||||
* @returns {Object} Object containing handler functions
|
||||
*/
|
||||
function createStreamEventHandlers(res) {
|
||||
return {
|
||||
[GraphEvents.ON_RUN_STEP]: (event) => {
|
||||
if (res) {
|
||||
sendEvent(res, event);
|
||||
}
|
||||
},
|
||||
[GraphEvents.ON_MESSAGE_DELTA]: (event) => {
|
||||
if (res) {
|
||||
sendEvent(res, event);
|
||||
}
|
||||
},
|
||||
[GraphEvents.ON_REASONING_DELTA]: (event) => {
|
||||
if (res) {
|
||||
sendEvent(res, event);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
createFetch,
|
||||
createStreamEventHandlers,
|
||||
};
|
||||
|
|
@ -123,7 +123,7 @@ const getAuthFields = (toolKey) => {
|
|||
*
|
||||
* @param {object} object
|
||||
* @param {string} object.user
|
||||
* @param {Agent} [object.agent]
|
||||
* @param {Pick<Agent, 'id' | 'provider' | 'model'>} [object.agent]
|
||||
* @param {string} [object.model]
|
||||
* @param {EModelEndpoint} [object.endpoint]
|
||||
* @param {LoadToolOptions} [object.options]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue