🔧 fix: Update Token Calculations/Mapping, MCP env Initialization (#6406)

* fix: Enhance MCP initialization to process environment variables

* fix: only build tokenCountMap with messages that are being used in the payload

* fix: Adjust maxContextTokens calculation to account for maxOutputTokens

* refactor: Make processMCPEnv optional in MCPManager initialization

* chore: Bump version of librechat-data-provider to 0.7.73
This commit is contained in:
Danny Avila 2025-03-18 23:16:45 -04:00 committed by GitHub
parent d6a17784dc
commit efb616d600
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 46 additions and 20 deletions

View file

@ -366,17 +366,14 @@ class BaseClient {
* context: TMessage[],
* remainingContextTokens: number,
* messagesToRefine: TMessage[],
* summaryIndex: number,
* }>} An object with four properties: `context`, `summaryIndex`, `remainingContextTokens`, and `messagesToRefine`.
* }>} An object with three properties: `context`, `remainingContextTokens`, and `messagesToRefine`.
* `context` is an array of messages that fit within the token limit.
* `summaryIndex` is the index of the first message in the `messagesToRefine` array.
* `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context.
* `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
*/
async getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, instructions }) {
// Every reply is primed with <|start|>assistant<|message|>, so we
// start with 3 tokens for the label after all messages have been counted.
let summaryIndex = -1;
let currentTokenCount = 3;
const instructionsTokenCount = instructions?.tokenCount ?? 0;
let remainingContextTokens =
@ -409,14 +406,12 @@ class BaseClient {
}
const prunedMemory = messages;
summaryIndex = prunedMemory.length - 1;
remainingContextTokens -= currentTokenCount;
return {
context: context.reverse(),
remainingContextTokens,
messagesToRefine: prunedMemory,
summaryIndex,
};
}
@ -459,7 +454,7 @@ class BaseClient {
let orderedWithInstructions = this.addInstructions(orderedMessages, instructions);
let { context, remainingContextTokens, messagesToRefine, summaryIndex } =
let { context, remainingContextTokens, messagesToRefine } =
await this.getMessagesWithinTokenLimit({
messages: orderedWithInstructions,
instructions,
@ -529,7 +524,7 @@ class BaseClient {
}
// Make sure to only continue summarization logic if the summary message was generated
shouldSummarize = summaryMessage && shouldSummarize;
shouldSummarize = summaryMessage != null && shouldSummarize === true;
logger.debug('[BaseClient] Context Count (2/2)', {
remainingContextTokens,
@ -539,17 +534,18 @@ class BaseClient {
/** @type {Record<string, number> | undefined} */
let tokenCountMap;
if (buildTokenMap) {
tokenCountMap = orderedWithInstructions.reduce((map, message, index) => {
const currentPayload = shouldSummarize ? orderedWithInstructions : context;
tokenCountMap = currentPayload.reduce((map, message, index) => {
const { messageId } = message;
if (!messageId) {
return map;
}
if (shouldSummarize && index === summaryIndex && !usePrevSummary) {
if (shouldSummarize && index === messagesToRefine.length - 1 && !usePrevSummary) {
map.summaryMessage = { ...summaryMessage, messageId, tokenCount: summaryTokenCount };
}
map[messageId] = orderedWithInstructions[index].tokenCount;
map[messageId] = currentPayload[index].tokenCount;
return map;
}, {});
}