-
-
-
-
- }
- />
+
{hasValue ? (
@@ -66,12 +86,18 @@ function AuthField({ name, config, hasValue, control, errors }: AuthFieldProps)
placeholder={
hasValue
? localize('com_ui_mcp_update_var', { 0: config.title })
- : `${localize('com_ui_mcp_enter_var', { 0: config.title })} ${localize('com_ui_optional')}`
+ : localize('com_ui_mcp_enter_var', { 0: config.title })
}
className="w-full rounded border border-border-medium bg-transparent px-2 py-1 text-text-primary placeholder:text-text-secondary focus:outline-none sm:text-sm"
/>
)}
/>
+ {sanitizedDescription && (
+
+ )}
{errors[name] &&
{errors[name]?.message}
}
);
diff --git a/package-lock.json b/package-lock.json
index 30d31c370b..6ece322d7d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -484,6 +484,7 @@
"copy-to-clipboard": "^3.3.3",
"cross-env": "^7.0.3",
"date-fns": "^3.3.1",
+ "dompurify": "^3.3.0",
"downloadjs": "^1.4.7",
"export-from-json": "^1.7.2",
"filenamify": "^6.0.0",
@@ -28441,11 +28442,10 @@
}
},
"node_modules/dompurify": {
- "version": "3.2.6",
- "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.6.tgz",
- "integrity": "sha512-/2GogDQlohXPZe6D6NOgQvXLPSYBqIWMnZ8zzOhn09REE4eyAzb+Hed3jhoM9OkuaJ8P6ZGTTVWQKAi8ieIzfQ==",
+ "version": "3.3.0",
+ "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.0.tgz",
+ "integrity": "sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==",
"license": "(MPL-2.0 OR Apache-2.0)",
- "peer": true,
"optionalDependencies": {
"@types/trusted-types": "^2.0.7"
}
@@ -46398,7 +46398,7 @@
"@tanstack/react-virtual": "^3.0.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
- "dompurify": "^3.2.6",
+ "dompurify": "^3.3.0",
"framer-motion": "^12.23.6",
"i18next": "^24.2.2 || ^25.3.2",
"i18next-browser-languagedetector": "^8.2.0",
diff --git a/packages/client/package.json b/packages/client/package.json
index 1c5abcfe85..8d395e9f89 100644
--- a/packages/client/package.json
+++ b/packages/client/package.json
@@ -51,7 +51,7 @@
"@tanstack/react-virtual": "^3.0.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
- "dompurify": "^3.2.6",
+ "dompurify": "^3.3.0",
"framer-motion": "^12.23.6",
"i18next": "^24.2.2 || ^25.3.2",
"i18next-browser-languagedetector": "^8.2.0",
From 1477da49870e48d2f9282d84293dc3741b65f2fa Mon Sep 17 00:00:00 2001
From: Danny Avila
Date: Tue, 2 Dec 2025 09:11:56 -0500
Subject: [PATCH 58/78] =?UTF-8?q?=F0=9F=96=A5=EF=B8=8F=20feat:=20Add=20Pro?=
=?UTF-8?q?xy=20Support=20for=20Tavily=20API=20Tool=20(#10770)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* 🖥️ feat: Add Proxy Support for Tavily API Tool
- Integrated ProxyAgent from undici to enable proxy support for API requests in TavilySearch and TavilySearchResults.
- Updated fetch options to conditionally include the proxy configuration based on the environment variable, enhancing flexibility for network requests.
* ci: TavilySearchResults with Proxy Support Tests
- Added tests to verify the integration of ProxyAgent for API requests in TavilySearchResults.
- Implemented conditional logic to check for the PROXY environment variable, ensuring correct usage of ProxyAgent based on its presence.
- Updated test setup to clear mocks before each test for improved isolation and reliability.
---
.../clients/tools/structured/TavilySearch.js | 11 +++-
.../tools/structured/TavilySearchResults.js | 11 +++-
.../specs/TavilySearchResults.spec.js | 50 ++++++++++++++++++-
3 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/api/app/clients/tools/structured/TavilySearch.js b/api/app/clients/tools/structured/TavilySearch.js
index b5478d0fc8..55f3b6e1c8 100644
--- a/api/app/clients/tools/structured/TavilySearch.js
+++ b/api/app/clients/tools/structured/TavilySearch.js
@@ -1,4 +1,5 @@
const { z } = require('zod');
+const { ProxyAgent, fetch } = require('undici');
const { tool } = require('@langchain/core/tools');
const { getApiKey } = require('./credentials');
@@ -19,13 +20,19 @@ function createTavilySearchTool(fields = {}) {
...kwargs,
};
- const response = await fetch('https://api.tavily.com/search', {
+ const fetchOptions = {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
- });
+ };
+
+ if (process.env.PROXY) {
+ fetchOptions.dispatcher = new ProxyAgent(process.env.PROXY);
+ }
+
+ const response = await fetch('https://api.tavily.com/search', fetchOptions);
const json = await response.json();
if (!response.ok) {
diff --git a/api/app/clients/tools/structured/TavilySearchResults.js b/api/app/clients/tools/structured/TavilySearchResults.js
index 9461293371..796f31dcca 100644
--- a/api/app/clients/tools/structured/TavilySearchResults.js
+++ b/api/app/clients/tools/structured/TavilySearchResults.js
@@ -1,4 +1,5 @@
const { z } = require('zod');
+const { ProxyAgent, fetch } = require('undici');
const { Tool } = require('@langchain/core/tools');
const { getEnvironmentVariable } = require('@langchain/core/utils/env');
@@ -102,13 +103,19 @@ class TavilySearchResults extends Tool {
...this.kwargs,
};
- const response = await fetch('https://api.tavily.com/search', {
+ const fetchOptions = {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
- });
+ };
+
+ if (process.env.PROXY) {
+ fetchOptions.dispatcher = new ProxyAgent(process.env.PROXY);
+ }
+
+ const response = await fetch('https://api.tavily.com/search', fetchOptions);
const json = await response.json();
if (!response.ok) {
diff --git a/api/app/clients/tools/structured/specs/TavilySearchResults.spec.js b/api/app/clients/tools/structured/specs/TavilySearchResults.spec.js
index 5ea00140c7..f37c83e30e 100644
--- a/api/app/clients/tools/structured/specs/TavilySearchResults.spec.js
+++ b/api/app/clients/tools/structured/specs/TavilySearchResults.spec.js
@@ -1,6 +1,7 @@
+const { fetch, ProxyAgent } = require('undici');
const TavilySearchResults = require('../TavilySearchResults');
-jest.mock('node-fetch');
+jest.mock('undici');
jest.mock('@langchain/core/utils/env');
describe('TavilySearchResults', () => {
@@ -13,6 +14,7 @@ describe('TavilySearchResults', () => {
beforeEach(() => {
jest.resetModules();
+ jest.clearAllMocks();
process.env = {
...originalEnv,
TAVILY_API_KEY: mockApiKey,
@@ -20,7 +22,6 @@ describe('TavilySearchResults', () => {
});
afterEach(() => {
- jest.clearAllMocks();
process.env = originalEnv;
});
@@ -35,4 +36,49 @@ describe('TavilySearchResults', () => {
});
expect(instance.apiKey).toBe(mockApiKey);
});
+
+ describe('proxy support', () => {
+ const mockResponse = {
+ ok: true,
+ json: jest.fn().mockResolvedValue({ results: [] }),
+ };
+
+ beforeEach(() => {
+ fetch.mockResolvedValue(mockResponse);
+ });
+
+ it('should use ProxyAgent when PROXY env var is set', async () => {
+ const proxyUrl = 'http://proxy.example.com:8080';
+ process.env.PROXY = proxyUrl;
+
+ const mockProxyAgent = { type: 'proxy-agent' };
+ ProxyAgent.mockImplementation(() => mockProxyAgent);
+
+ const instance = new TavilySearchResults({ TAVILY_API_KEY: mockApiKey });
+ await instance._call({ query: 'test query' });
+
+ expect(ProxyAgent).toHaveBeenCalledWith(proxyUrl);
+ expect(fetch).toHaveBeenCalledWith(
+ 'https://api.tavily.com/search',
+ expect.objectContaining({
+ dispatcher: mockProxyAgent,
+ }),
+ );
+ });
+
+ it('should not use ProxyAgent when PROXY env var is not set', async () => {
+ delete process.env.PROXY;
+
+ const instance = new TavilySearchResults({ TAVILY_API_KEY: mockApiKey });
+ await instance._call({ query: 'test query' });
+
+ expect(ProxyAgent).not.toHaveBeenCalled();
+ expect(fetch).toHaveBeenCalledWith(
+ 'https://api.tavily.com/search',
+ expect.not.objectContaining({
+ dispatcher: expect.anything(),
+ }),
+ );
+ });
+ });
});
From 28bdd0dfa696af77da42471a105bc1308f274b6c Mon Sep 17 00:00:00 2001
From: Danny Avila
Date: Tue, 2 Dec 2025 09:48:41 -0500
Subject: [PATCH 59/78] =?UTF-8?q?=F0=9F=A7=8A=20refactor:=20`iconURL`=20Ha?=
=?UTF-8?q?ndling=20in=20Submission=20Flow=20(#10772)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- Updated `useChatFunctions` to derive `iconURL` from conversation data before parsing
- Modified `parseCompactConvo` to explicitly omit `iconURL` from parsed conversations, reinforcing security measures around URL handling.
---
client/src/hooks/Chat/useChatFunctions.ts | 3 ++-
packages/data-provider/src/parsers.ts | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/client/src/hooks/Chat/useChatFunctions.ts b/client/src/hooks/Chat/useChatFunctions.ts
index 114a6376a6..8a61cd91c1 100644
--- a/client/src/hooks/Chat/useChatFunctions.ts
+++ b/client/src/hooks/Chat/useChatFunctions.ts
@@ -168,6 +168,7 @@ export default function useChatFunctions({
const endpointsConfig = queryClient.getQueryData([QueryKeys.endpoints]);
const endpointType = getEndpointField(endpointsConfig, endpoint, 'type');
+ const iconURL = conversation?.iconURL;
/** This becomes part of the `endpointOption` */
const convo = parseCompactConvo({
@@ -248,9 +249,9 @@ export default function useChatFunctions({
conversationId,
unfinished: false,
isCreatedByUser: false,
- iconURL: convo?.iconURL,
model: convo?.model,
error: false,
+ iconURL,
};
if (isAssistantsEndpoint(endpoint)) {
diff --git a/packages/data-provider/src/parsers.ts b/packages/data-provider/src/parsers.ts
index be8d6dcde0..c4fea469ee 100644
--- a/packages/data-provider/src/parsers.ts
+++ b/packages/data-provider/src/parsers.ts
@@ -326,7 +326,7 @@ export const parseCompactConvo = ({
possibleValues?: TPossibleValues;
// TODO: POC for default schema
// defaultSchema?: Partial,
-}) => {
+}): Omit | null => {
if (!endpoint) {
throw new Error(`undefined endpoint: ${endpoint}`);
}
From b2387cc6fad0788cdb0fd591be6fa8cb6a680aaa Mon Sep 17 00:00:00 2001
From: Danny Avila
Date: Tue, 2 Dec 2025 10:35:19 -0500
Subject: [PATCH 60/78] =?UTF-8?q?=F0=9F=93=A6=20chore:=20Bump=20`@librecha?=
=?UTF-8?q?t/agents`=20to=20v3.0.36=20(#10775)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
api/package.json | 2 +-
package-lock.json | 10 +++++-----
packages/api/package.json | 2 +-
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/api/package.json b/api/package.json
index a54e963d13..b782588303 100644
--- a/api/package.json
+++ b/api/package.json
@@ -47,7 +47,7 @@
"@langchain/google-genai": "^0.2.13",
"@langchain/google-vertexai": "^0.2.13",
"@langchain/textsplitters": "^0.1.0",
- "@librechat/agents": "^3.0.35",
+ "@librechat/agents": "^3.0.36",
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@microsoft/microsoft-graph-client": "^3.0.7",
diff --git a/package-lock.json b/package-lock.json
index 6ece322d7d..fe4489d763 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -61,7 +61,7 @@
"@langchain/google-genai": "^0.2.13",
"@langchain/google-vertexai": "^0.2.13",
"@langchain/textsplitters": "^0.1.0",
- "@librechat/agents": "^3.0.35",
+ "@librechat/agents": "^3.0.36",
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@microsoft/microsoft-graph-client": "^3.0.7",
@@ -16281,9 +16281,9 @@
}
},
"node_modules/@librechat/agents": {
- "version": "3.0.35",
- "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.35.tgz",
- "integrity": "sha512-9KcTin8CtJIsADkcZtyCHwfn9GQ7AIRWTAhNDFtxXsuqmlnytD8bjh0GiRc6uOOQ/Dw8zL/oRcqNDubempwBfg==",
+ "version": "3.0.36",
+ "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-3.0.36.tgz",
+ "integrity": "sha512-52+uNiG0X2B4TZX03ldFRvqtJrExnntEFQV5UfA38+2sNbYgPm4lcdKyAHr9OTPdmmtbmDY/gKKguRiUzLVL2g==",
"license": "MIT",
"dependencies": {
"@langchain/anthropic": "^0.3.26",
@@ -46276,7 +46276,7 @@
"@azure/storage-blob": "^12.27.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.79",
- "@librechat/agents": "^3.0.35",
+ "@librechat/agents": "^3.0.36",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.21.0",
"axios": "^1.12.1",
diff --git a/packages/api/package.json b/packages/api/package.json
index 36a290fb57..5799eee261 100644
--- a/packages/api/package.json
+++ b/packages/api/package.json
@@ -84,7 +84,7 @@
"@azure/storage-blob": "^12.27.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.79",
- "@librechat/agents": "^3.0.35",
+ "@librechat/agents": "^3.0.36",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.21.0",
"axios": "^1.12.1",
From 8bdc808074c5ab99bc662f23f20183ebc8228b4f Mon Sep 17 00:00:00 2001
From: Danny Avila
Date: Tue, 2 Dec 2025 12:22:04 -0500
Subject: [PATCH 61/78] =?UTF-8?q?=E2=9A=A1=20refactor:=20Optimize=20&=20St?=
=?UTF-8?q?andardize=20Tokenizer=20Usage=20(#10777)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* refactor: Token Limit Processing with Enhanced Efficiency
- Added a new test suite for `processTextWithTokenLimit`, ensuring comprehensive coverage of various scenarios including under, at, and exceeding token limits.
- Refactored the `processTextWithTokenLimit` function to utilize a ratio-based estimation method, significantly reducing the number of token counting function calls compared to the previous binary search approach.
- Improved handling of edge cases and variable token density, ensuring accurate truncation and performance across diverse text inputs.
- Included direct comparisons with the old implementation to validate correctness and efficiency improvements.
* refactor: Remove Tokenizer Route and Related References
- Deleted the tokenizer route from the server and removed its references from the routes index and server files, streamlining the API structure.
- This change simplifies the routing configuration by eliminating unused endpoints.
* refactor: Migrate countTokens Utility to API Module
- Removed the local countTokens utility and integrated it into the @librechat/api module for centralized access.
- Updated various files to reference the new countTokens import from the API module, ensuring consistent usage across the application.
- Cleaned up unused references and imports related to the previous countTokens implementation.
* refactor: Centralize escapeRegExp Utility in API Module
- Moved the escapeRegExp function from local utility files to the @librechat/api module for consistent usage across the application.
- Updated imports in various files to reference the new centralized escapeRegExp function, ensuring cleaner code and reducing redundancy.
- Removed duplicate implementations of escapeRegExp from multiple files, streamlining the codebase.
* refactor: Enhance Token Counting Flexibility in Text Processing
- Updated the `processTextWithTokenLimit` function to accept both synchronous and asynchronous token counting functions, improving its versatility.
- Introduced a new `TokenCountFn` type to define the token counting function signature.
- Added comprehensive tests to validate the behavior of `processTextWithTokenLimit` with both sync and async token counting functions, ensuring consistent results.
- Implemented a wrapper to track call counts for the `countTokens` function, optimizing performance and reducing unnecessary calls.
- Enhanced existing tests to compare the performance of the new implementation against the old one, demonstrating significant improvements in efficiency.
* chore: documentation for Truncation Safety Buffer in Token Processing
- Added a safety buffer multiplier to the character position estimates during text truncation to prevent overshooting token limits.
- Updated the `processTextWithTokenLimit` function to utilize the new `TRUNCATION_SAFETY_BUFFER` constant, enhancing the accuracy of token limit processing.
- Improved documentation to clarify the rationale behind the buffer and its impact on performance and efficiency in token counting.
---
api/app/clients/BaseClient.js | 2 +-
api/models/Prompt.js | 2 +-
api/server/controllers/assistants/chatV1.js | 3 +-
api/server/controllers/assistants/chatV2.js | 3 +-
api/server/experimental.js | 1 -
api/server/index.js | 1 -
api/server/routes/index.js | 2 -
api/server/routes/messages.js | 3 +-
api/server/routes/tokenizer.js | 19 -
api/server/services/Threads/manage.js | 2 +-
api/server/utils/countTokens.js | 37 -
api/server/utils/handleText.js | 9 -
api/server/utils/index.js | 2 -
packages/api/src/prompts/format.ts | 2 +-
packages/api/src/utils/common.ts | 9 +
packages/api/src/utils/index.ts | 2 +-
packages/api/src/utils/text.spec.ts | 851 ++++++++++++++++++++
packages/api/src/utils/text.ts | 72 +-
packages/api/src/utils/tokenizer.ts | 10 +
19 files changed, 925 insertions(+), 107 deletions(-)
delete mode 100644 api/server/routes/tokenizer.js
delete mode 100644 api/server/utils/countTokens.js
create mode 100644 packages/api/src/utils/text.spec.ts
diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js
index c0d9169b51..f4a69be229 100644
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@@ -2,6 +2,7 @@ const crypto = require('crypto');
const fetch = require('node-fetch');
const { logger } = require('@librechat/data-schemas');
const {
+ countTokens,
getBalanceConfig,
extractFileContext,
encodeAndFormatAudios,
@@ -23,7 +24,6 @@ const { getMessages, saveMessage, updateMessage, saveConvo, getConvo } = require
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { checkBalance } = require('~/models/balanceMethods');
const { truncateToolCallOutputs } = require('./prompts');
-const countTokens = require('~/server/utils/countTokens');
const { getFiles } = require('~/models/File');
const TextStream = require('./TextStream');
diff --git a/api/models/Prompt.js b/api/models/Prompt.js
index fbc161e97d..bde911b23a 100644
--- a/api/models/Prompt.js
+++ b/api/models/Prompt.js
@@ -1,4 +1,5 @@
const { ObjectId } = require('mongodb');
+const { escapeRegExp } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const {
Constants,
@@ -14,7 +15,6 @@ const {
} = require('./Project');
const { removeAllPermissions } = require('~/server/services/PermissionService');
const { PromptGroup, Prompt, AclEntry } = require('~/db/models');
-const { escapeRegExp } = require('~/server/utils');
/**
* Create a pipeline for the aggregation to get prompt groups
diff --git a/api/server/controllers/assistants/chatV1.js b/api/server/controllers/assistants/chatV1.js
index 4bd49e04dd..91759bed37 100644
--- a/api/server/controllers/assistants/chatV1.js
+++ b/api/server/controllers/assistants/chatV1.js
@@ -1,7 +1,7 @@
const { v4 } = require('uuid');
const { sleep } = require('@librechat/agents');
const { logger } = require('@librechat/data-schemas');
-const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
+const { sendEvent, getBalanceConfig, getModelMaxTokens, countTokens } = require('@librechat/api');
const {
Time,
Constants,
@@ -33,7 +33,6 @@ const { getTransactions } = require('~/models/Transaction');
const { checkBalance } = require('~/models/balanceMethods');
const { getConvo } = require('~/models/Conversation');
const getLogStores = require('~/cache/getLogStores');
-const { countTokens } = require('~/server/utils');
const { getOpenAIClient } = require('./helpers');
/**
diff --git a/api/server/controllers/assistants/chatV2.js b/api/server/controllers/assistants/chatV2.js
index 20b3398ee2..2dcfef2846 100644
--- a/api/server/controllers/assistants/chatV2.js
+++ b/api/server/controllers/assistants/chatV2.js
@@ -1,7 +1,7 @@
const { v4 } = require('uuid');
const { sleep } = require('@librechat/agents');
const { logger } = require('@librechat/data-schemas');
-const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
+const { sendEvent, getBalanceConfig, getModelMaxTokens, countTokens } = require('@librechat/api');
const {
Time,
Constants,
@@ -30,7 +30,6 @@ const { getTransactions } = require('~/models/Transaction');
const { checkBalance } = require('~/models/balanceMethods');
const { getConvo } = require('~/models/Conversation');
const getLogStores = require('~/cache/getLogStores');
-const { countTokens } = require('~/server/utils');
const { getOpenAIClient } = require('./helpers');
/**
diff --git a/api/server/experimental.js b/api/server/experimental.js
index 2e7f5dff63..0ceb58de22 100644
--- a/api/server/experimental.js
+++ b/api/server/experimental.js
@@ -292,7 +292,6 @@ if (cluster.isMaster) {
app.use('/api/presets', routes.presets);
app.use('/api/prompts', routes.prompts);
app.use('/api/categories', routes.categories);
- app.use('/api/tokenizer', routes.tokenizer);
app.use('/api/endpoints', routes.endpoints);
app.use('/api/balance', routes.balance);
app.use('/api/models', routes.models);
diff --git a/api/server/index.js b/api/server/index.js
index d0bb64405f..767847c286 100644
--- a/api/server/index.js
+++ b/api/server/index.js
@@ -128,7 +128,6 @@ const startServer = async () => {
app.use('/api/presets', routes.presets);
app.use('/api/prompts', routes.prompts);
app.use('/api/categories', routes.categories);
- app.use('/api/tokenizer', routes.tokenizer);
app.use('/api/endpoints', routes.endpoints);
app.use('/api/balance', routes.balance);
app.use('/api/models', routes.models);
diff --git a/api/server/routes/index.js b/api/server/routes/index.js
index adaca3859a..e8250a1f4d 100644
--- a/api/server/routes/index.js
+++ b/api/server/routes/index.js
@@ -1,7 +1,6 @@
const accessPermissions = require('./accessPermissions');
const assistants = require('./assistants');
const categories = require('./categories');
-const tokenizer = require('./tokenizer');
const endpoints = require('./endpoints');
const staticRoute = require('./static');
const messages = require('./messages');
@@ -53,7 +52,6 @@ module.exports = {
messages,
memories,
endpoints,
- tokenizer,
assistants,
categories,
staticRoute,
diff --git a/api/server/routes/messages.js b/api/server/routes/messages.js
index 1e214278c9..901dd8961f 100644
--- a/api/server/routes/messages.js
+++ b/api/server/routes/messages.js
@@ -1,7 +1,7 @@
const express = require('express');
-const { unescapeLaTeX } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { ContentTypes } = require('librechat-data-provider');
+const { unescapeLaTeX, countTokens } = require('@librechat/api');
const {
saveConvo,
getMessage,
@@ -14,7 +14,6 @@ const { findAllArtifacts, replaceArtifactContent } = require('~/server/services/
const { requireJwtAuth, validateMessageReq } = require('~/server/middleware');
const { cleanUpPrimaryKeyValue } = require('~/lib/utils/misc');
const { getConvosQueried } = require('~/models/Conversation');
-const { countTokens } = require('~/server/utils');
const { Message } = require('~/db/models');
const router = express.Router();
diff --git a/api/server/routes/tokenizer.js b/api/server/routes/tokenizer.js
deleted file mode 100644
index 62eb31b70e..0000000000
--- a/api/server/routes/tokenizer.js
+++ /dev/null
@@ -1,19 +0,0 @@
-const express = require('express');
-const { logger } = require('@librechat/data-schemas');
-const requireJwtAuth = require('~/server/middleware/requireJwtAuth');
-const { countTokens } = require('~/server/utils');
-
-const router = express.Router();
-
-router.post('/', requireJwtAuth, async (req, res) => {
- try {
- const { arg } = req.body;
- const count = await countTokens(arg?.text ?? arg);
- res.send({ count });
- } catch (e) {
- logger.error('[/tokenizer] Error counting tokens', e);
- res.status(500).json('Error counting tokens');
- }
-});
-
-module.exports = router;
diff --git a/api/server/services/Threads/manage.js b/api/server/services/Threads/manage.js
index 4cc1e107ed..627dba1a35 100644
--- a/api/server/services/Threads/manage.js
+++ b/api/server/services/Threads/manage.js
@@ -1,5 +1,6 @@
const path = require('path');
const { v4 } = require('uuid');
+const { countTokens, escapeRegExp } = require('@librechat/api');
const {
Constants,
ContentTypes,
@@ -8,7 +9,6 @@ const {
} = require('librechat-data-provider');
const { retrieveAndProcessFile } = require('~/server/services/Files/process');
const { recordMessage, getMessages } = require('~/models/Message');
-const { countTokens, escapeRegExp } = require('~/server/utils');
const { spendTokens } = require('~/models/spendTokens');
const { saveConvo } = require('~/models/Conversation');
diff --git a/api/server/utils/countTokens.js b/api/server/utils/countTokens.js
deleted file mode 100644
index 504de26a5e..0000000000
--- a/api/server/utils/countTokens.js
+++ /dev/null
@@ -1,37 +0,0 @@
-const { Tiktoken } = require('tiktoken/lite');
-const { logger } = require('@librechat/data-schemas');
-const p50k_base = require('tiktoken/encoders/p50k_base.json');
-const cl100k_base = require('tiktoken/encoders/cl100k_base.json');
-
-/**
- * Counts the number of tokens in a given text using a specified encoding model.
- *
- * This function utilizes the 'Tiktoken' library to encode text based on the selected model.
- * It supports two models, 'text-davinci-003' and 'gpt-3.5-turbo', each with its own encoding strategy.
- * For 'text-davinci-003', the 'p50k_base' encoder is used, whereas for other models, the 'cl100k_base' encoder is applied.
- * In case of an error during encoding, the error is logged, and the function returns 0.
- *
- * @async
- * @param {string} text - The text to be tokenized. Defaults to an empty string if not provided.
- * @param {string} modelName - The name of the model used for tokenizing. Defaults to 'gpt-3.5-turbo'.
- * @returns {Promise} The number of tokens in the provided text. Returns 0 if an error occurs.
- * @throws Logs the error to a logger and rethrows if any error occurs during tokenization.
- */
-const countTokens = async (text = '', modelName = 'gpt-3.5-turbo') => {
- let encoder = null;
- try {
- const model = modelName.includes('text-davinci-003') ? p50k_base : cl100k_base;
- encoder = new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str);
- const tokens = encoder.encode(text);
- encoder.free();
- return tokens.length;
- } catch (e) {
- logger.error('[countTokens]', e);
- if (encoder) {
- encoder.free();
- }
- return 0;
- }
-};
-
-module.exports = countTokens;
diff --git a/api/server/utils/handleText.js b/api/server/utils/handleText.js
index 15c2db3fcc..a798dc99bd 100644
--- a/api/server/utils/handleText.js
+++ b/api/server/utils/handleText.js
@@ -10,14 +10,6 @@ const {
const { sendEvent } = require('@librechat/api');
const partialRight = require('lodash/partialRight');
-/** Helper function to escape special characters in regex
- * @param {string} string - The string to escape.
- * @returns {string} The escaped string.
- */
-function escapeRegExp(string) {
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-}
-
const addSpaceIfNeeded = (text) => (text.length > 0 && !text.endsWith(' ') ? text + ' ' : text);
const base = { message: true, initial: true };
@@ -181,7 +173,6 @@ function generateConfig(key, baseURL, endpoint) {
module.exports = {
handleText,
formatSteps,
- escapeRegExp,
formatAction,
isUserProvided,
generateConfig,
diff --git a/api/server/utils/index.js b/api/server/utils/index.js
index 7e29b9f518..918ab54f85 100644
--- a/api/server/utils/index.js
+++ b/api/server/utils/index.js
@@ -1,5 +1,4 @@
const removePorts = require('./removePorts');
-const countTokens = require('./countTokens');
const handleText = require('./handleText');
const sendEmail = require('./sendEmail');
const queue = require('./queue');
@@ -7,7 +6,6 @@ const files = require('./files');
module.exports = {
...handleText,
- countTokens,
removePorts,
sendEmail,
...files,
diff --git a/packages/api/src/prompts/format.ts b/packages/api/src/prompts/format.ts
index ad6f4ec237..df2b11b59a 100644
--- a/packages/api/src/prompts/format.ts
+++ b/packages/api/src/prompts/format.ts
@@ -2,6 +2,7 @@ import { SystemCategories } from 'librechat-data-provider';
import type { IPromptGroupDocument as IPromptGroup } from '@librechat/data-schemas';
import type { Types } from 'mongoose';
import type { PromptGroupsListResponse } from '~/types';
+import { escapeRegExp } from '~/utils/common';
/**
* Formats prompt groups for the paginated /groups endpoint response
@@ -101,7 +102,6 @@ export function buildPromptGroupFilter({
// Handle name filter - convert to regex for case-insensitive search
if (name) {
- const escapeRegExp = (str: string) => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
filter.name = new RegExp(escapeRegExp(name), 'i');
}
diff --git a/packages/api/src/utils/common.ts b/packages/api/src/utils/common.ts
index a5860b0a69..6f4871b741 100644
--- a/packages/api/src/utils/common.ts
+++ b/packages/api/src/utils/common.ts
@@ -48,3 +48,12 @@ export function optionalChainWithEmptyCheck(
}
return values[values.length - 1];
}
+
+/**
+ * Escapes special characters in a string for use in a regular expression.
+ * @param str - The string to escape.
+ * @returns The escaped string safe for use in RegExp.
+ */
+export function escapeRegExp(str: string): string {
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
diff --git a/packages/api/src/utils/index.ts b/packages/api/src/utils/index.ts
index 050f42796b..76f11289cb 100644
--- a/packages/api/src/utils/index.ts
+++ b/packages/api/src/utils/index.ts
@@ -17,7 +17,7 @@ export * from './promise';
export * from './sanitizeTitle';
export * from './tempChatRetention';
export * from './text';
-export { default as Tokenizer } from './tokenizer';
+export { default as Tokenizer, countTokens } from './tokenizer';
export * from './yaml';
export * from './http';
export * from './tokens';
diff --git a/packages/api/src/utils/text.spec.ts b/packages/api/src/utils/text.spec.ts
new file mode 100644
index 0000000000..1b8d8aac98
--- /dev/null
+++ b/packages/api/src/utils/text.spec.ts
@@ -0,0 +1,851 @@
+import { processTextWithTokenLimit, TokenCountFn } from './text';
+import Tokenizer, { countTokens } from './tokenizer';
+
+jest.mock('@librechat/data-schemas', () => ({
+ logger: {
+ debug: jest.fn(),
+ warn: jest.fn(),
+ error: jest.fn(),
+ },
+}));
+
+/**
+ * OLD IMPLEMENTATION (Binary Search) - kept for comparison testing
+ * This is the original algorithm that caused CPU spikes
+ */
+async function processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn,
+}: {
+ text: string;
+ tokenLimit: number;
+ tokenCountFn: TokenCountFn;
+}): Promise<{ text: string; tokenCount: number; wasTruncated: boolean }> {
+ const originalTokenCount = await tokenCountFn(text);
+
+ if (originalTokenCount <= tokenLimit) {
+ return {
+ text,
+ tokenCount: originalTokenCount,
+ wasTruncated: false,
+ };
+ }
+
+ let low = 0;
+ let high = text.length;
+ let bestText = '';
+
+ while (low <= high) {
+ const mid = Math.floor((low + high) / 2);
+ const truncatedText = text.substring(0, mid);
+ const tokenCount = await tokenCountFn(truncatedText);
+
+ if (tokenCount <= tokenLimit) {
+ bestText = truncatedText;
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ const finalTokenCount = await tokenCountFn(bestText);
+
+ return {
+ text: bestText,
+ tokenCount: finalTokenCount,
+ wasTruncated: true,
+ };
+}
+
+/**
+ * Creates a wrapper around Tokenizer.getTokenCount that tracks call count
+ */
+const createRealTokenCounter = () => {
+ let callCount = 0;
+ const tokenCountFn = (text: string): number => {
+ callCount++;
+ return Tokenizer.getTokenCount(text, 'cl100k_base');
+ };
+ return {
+ tokenCountFn,
+ getCallCount: () => callCount,
+ resetCallCount: () => {
+ callCount = 0;
+ },
+ };
+};
+
+/**
+ * Creates a wrapper around the async countTokens function that tracks call count
+ */
+const createCountTokensCounter = () => {
+ let callCount = 0;
+ const tokenCountFn = async (text: string): Promise => {
+ callCount++;
+ return countTokens(text);
+ };
+ return {
+ tokenCountFn,
+ getCallCount: () => callCount,
+ resetCallCount: () => {
+ callCount = 0;
+ },
+ };
+};
+
+describe('processTextWithTokenLimit', () => {
+ /**
+ * Creates a mock token count function that simulates realistic token counting.
+ * Roughly 4 characters per token (common for English text).
+ * Tracks call count to verify efficiency.
+ */
+ const createMockTokenCounter = () => {
+ let callCount = 0;
+ const tokenCountFn = (text: string): number => {
+ callCount++;
+ return Math.ceil(text.length / 4);
+ };
+ return {
+ tokenCountFn,
+ getCallCount: () => callCount,
+ resetCallCount: () => {
+ callCount = 0;
+ },
+ };
+ };
+
+ /** Creates a string of specified character length */
+ const createTextOfLength = (charLength: number): string => {
+ return 'a'.repeat(charLength);
+ };
+
+ /** Creates realistic text content with varied token density */
+ const createRealisticText = (approximateTokens: number): string => {
+ const words = [
+ 'the',
+ 'quick',
+ 'brown',
+ 'fox',
+ 'jumps',
+ 'over',
+ 'lazy',
+ 'dog',
+ 'lorem',
+ 'ipsum',
+ 'dolor',
+ 'sit',
+ 'amet',
+ 'consectetur',
+ 'adipiscing',
+ 'elit',
+ 'sed',
+ 'do',
+ 'eiusmod',
+ 'tempor',
+ 'incididunt',
+ 'ut',
+ 'labore',
+ 'et',
+ 'dolore',
+ 'magna',
+ 'aliqua',
+ 'enim',
+ 'ad',
+ 'minim',
+ 'veniam',
+ 'authentication',
+ 'implementation',
+ 'configuration',
+ 'documentation',
+ ];
+ const result: string[] = [];
+ for (let i = 0; i < approximateTokens; i++) {
+ result.push(words[i % words.length]);
+ }
+ return result.join(' ');
+ };
+
+ describe('tokenCountFn flexibility (sync and async)', () => {
+ it('should work with synchronous tokenCountFn', async () => {
+ const syncTokenCountFn = (text: string): number => Math.ceil(text.length / 4);
+ const text = 'Hello, world! This is a test message.';
+ const tokenLimit = 5;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: syncTokenCountFn,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ });
+
+ it('should work with asynchronous tokenCountFn', async () => {
+ const asyncTokenCountFn = async (text: string): Promise => {
+ await new Promise((resolve) => setTimeout(resolve, 1));
+ return Math.ceil(text.length / 4);
+ };
+ const text = 'Hello, world! This is a test message.';
+ const tokenLimit = 5;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: asyncTokenCountFn,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ });
+
+ it('should produce equivalent results with sync and async tokenCountFn', async () => {
+ const syncTokenCountFn = (text: string): number => Math.ceil(text.length / 4);
+ const asyncTokenCountFn = async (text: string): Promise => Math.ceil(text.length / 4);
+ const text = 'a'.repeat(8000);
+ const tokenLimit = 1000;
+
+ const syncResult = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: syncTokenCountFn,
+ });
+
+ const asyncResult = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: asyncTokenCountFn,
+ });
+
+ expect(syncResult.tokenCount).toBe(asyncResult.tokenCount);
+ expect(syncResult.wasTruncated).toBe(asyncResult.wasTruncated);
+ expect(syncResult.text.length).toBe(asyncResult.text.length);
+ });
+ });
+
+ describe('when text is under the token limit', () => {
+ it('should return original text unchanged', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = 'Hello, world!';
+ const tokenLimit = 100;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.text).toBe(text);
+ expect(result.wasTruncated).toBe(false);
+ });
+
+ it('should return correct token count', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = 'Hello, world!';
+ const tokenLimit = 100;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.tokenCount).toBe(Math.ceil(text.length / 4));
+ });
+
+ it('should only call tokenCountFn once when under limit', async () => {
+ const { tokenCountFn, getCallCount } = createMockTokenCounter();
+ const text = 'Hello, world!';
+ const tokenLimit = 100;
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(getCallCount()).toBe(1);
+ });
+ });
+
+ describe('when text is exactly at the token limit', () => {
+ it('should return original text unchanged', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = createTextOfLength(400);
+ const tokenLimit = 100;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.text).toBe(text);
+ expect(result.wasTruncated).toBe(false);
+ expect(result.tokenCount).toBe(tokenLimit);
+ });
+ });
+
+ describe('when text exceeds the token limit', () => {
+ it('should truncate text to fit within limit', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = createTextOfLength(8000);
+ const tokenLimit = 1000;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ expect(result.text.length).toBeLessThan(text.length);
+ });
+
+ it('should truncate text to be close to but not exceed the limit', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = createTextOfLength(8000);
+ const tokenLimit = 1000;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ expect(result.tokenCount).toBeGreaterThan(tokenLimit * 0.9);
+ });
+ });
+
+ describe('efficiency - tokenCountFn call count', () => {
+ it('should call tokenCountFn at most 7 times for large text (vs ~17 for binary search)', async () => {
+ const { tokenCountFn, getCallCount } = createMockTokenCounter();
+ const text = createTextOfLength(400000);
+ const tokenLimit = 50000;
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(getCallCount()).toBeLessThanOrEqual(7);
+ });
+
+ it('should typically call tokenCountFn only 2-3 times for standard truncation', async () => {
+ const { tokenCountFn, getCallCount } = createMockTokenCounter();
+ const text = createTextOfLength(40000);
+ const tokenLimit = 5000;
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(getCallCount()).toBeLessThanOrEqual(3);
+ });
+
+ it('should call tokenCountFn only once when text is under limit', async () => {
+ const { tokenCountFn, getCallCount } = createMockTokenCounter();
+ const text = createTextOfLength(1000);
+ const tokenLimit = 10000;
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(getCallCount()).toBe(1);
+ });
+
+ it('should handle very large text (100k+ tokens) efficiently', async () => {
+ const { tokenCountFn, getCallCount } = createMockTokenCounter();
+ const text = createTextOfLength(500000);
+ const tokenLimit = 100000;
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(getCallCount()).toBeLessThanOrEqual(7);
+ });
+ });
+
+ describe('edge cases', () => {
+ it('should handle empty text', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = '';
+ const tokenLimit = 100;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.text).toBe('');
+ expect(result.tokenCount).toBe(0);
+ expect(result.wasTruncated).toBe(false);
+ });
+
+ it('should handle token limit of 1', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = createTextOfLength(1000);
+ const tokenLimit = 1;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ });
+
+ it('should handle text that is just slightly over the limit', async () => {
+ const { tokenCountFn } = createMockTokenCounter();
+ const text = createTextOfLength(404);
+ const tokenLimit = 100;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ });
+ });
+
+ describe('correctness with variable token density', () => {
+ it('should handle text with varying token density', async () => {
+ const variableDensityTokenCounter = (text: string): number => {
+ const shortWords = (text.match(/\s+/g) || []).length;
+ return Math.ceil(text.length / 4) + shortWords;
+ };
+
+ const text = 'This is a test with many short words and some longer concatenated words too';
+ const tokenLimit = 10;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: variableDensityTokenCounter,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ });
+ });
+
+ describe('direct comparison with OLD binary search implementation', () => {
+ it('should produce equivalent results to the old implementation', async () => {
+ const oldCounter = createMockTokenCounter();
+ const newCounter = createMockTokenCounter();
+ const text = createTextOfLength(8000);
+ const tokenLimit = 1000;
+
+ const oldResult = await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ const newResult = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ expect(newResult.wasTruncated).toBe(oldResult.wasTruncated);
+ expect(newResult.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ expect(oldResult.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ });
+
+ it('should use significantly fewer tokenCountFn calls than old implementation (400k chars)', async () => {
+ const oldCounter = createMockTokenCounter();
+ const newCounter = createMockTokenCounter();
+ const text = createTextOfLength(400000);
+ const tokenLimit = 50000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+
+ console.log(
+ `[400k chars] OLD implementation: ${oldCalls} calls, NEW implementation: ${newCalls} calls`,
+ );
+ console.log(`[400k chars] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
+
+ expect(newCalls).toBeLessThan(oldCalls);
+ expect(newCalls).toBeLessThanOrEqual(7);
+ });
+
+ it('should use significantly fewer tokenCountFn calls than old implementation (500k chars, 100k token limit)', async () => {
+ const oldCounter = createMockTokenCounter();
+ const newCounter = createMockTokenCounter();
+ const text = createTextOfLength(500000);
+ const tokenLimit = 100000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+
+ console.log(
+ `[500k chars] OLD implementation: ${oldCalls} calls, NEW implementation: ${newCalls} calls`,
+ );
+ console.log(`[500k chars] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
+
+ expect(newCalls).toBeLessThan(oldCalls);
+ });
+
+ it('should achieve at least 70% reduction in tokenCountFn calls', async () => {
+ const oldCounter = createMockTokenCounter();
+ const newCounter = createMockTokenCounter();
+ const text = createTextOfLength(500000);
+ const tokenLimit = 100000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+ const reduction = 1 - newCalls / oldCalls;
+
+ console.log(
+ `Efficiency improvement: ${(reduction * 100).toFixed(1)}% fewer tokenCountFn calls`,
+ );
+
+ expect(reduction).toBeGreaterThanOrEqual(0.7);
+ });
+
+ it('should simulate the reported scenario (122k tokens, 100k limit)', async () => {
+ const oldCounter = createMockTokenCounter();
+ const newCounter = createMockTokenCounter();
+ const text = createTextOfLength(489564);
+ const tokenLimit = 100000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+
+ console.log(`[User reported scenario: ~122k tokens]`);
+ console.log(`OLD implementation: ${oldCalls} tokenCountFn calls`);
+ console.log(`NEW implementation: ${newCalls} tokenCountFn calls`);
+ console.log(`Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
+
+ expect(newCalls).toBeLessThan(oldCalls);
+ expect(newCalls).toBeLessThanOrEqual(7);
+ });
+ });
+
+ describe('direct comparison with REAL tiktoken tokenizer', () => {
+ beforeEach(() => {
+ Tokenizer.freeAndResetAllEncoders();
+ });
+
+ it('should produce valid truncation with real tokenizer', async () => {
+ const counter = createRealTokenCounter();
+ const text = createRealisticText(5000);
+ const tokenLimit = 1000;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: counter.tokenCountFn,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ expect(result.text.length).toBeLessThan(text.length);
+ });
+
+ it('should use fewer tiktoken calls than old implementation (realistic text)', async () => {
+ const oldCounter = createRealTokenCounter();
+ const newCounter = createRealTokenCounter();
+ const text = createRealisticText(15000);
+ const tokenLimit = 5000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ Tokenizer.freeAndResetAllEncoders();
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+
+ console.log(`[Real tiktoken ~15k tokens] OLD: ${oldCalls} calls, NEW: ${newCalls} calls`);
+ console.log(`[Real tiktoken] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
+
+ expect(newCalls).toBeLessThan(oldCalls);
+ });
+
+ it('should handle the reported user scenario with real tokenizer (~120k tokens)', async () => {
+ const oldCounter = createRealTokenCounter();
+ const newCounter = createRealTokenCounter();
+ const text = createRealisticText(120000);
+ const tokenLimit = 100000;
+
+ const startOld = performance.now();
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+ const timeOld = performance.now() - startOld;
+
+ Tokenizer.freeAndResetAllEncoders();
+
+ const startNew = performance.now();
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+ const timeNew = performance.now() - startNew;
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+
+ console.log(`\n[REAL TIKTOKEN - User reported scenario: ~120k tokens]`);
+ console.log(`OLD implementation: ${oldCalls} tiktoken calls, ${timeOld.toFixed(0)}ms`);
+ console.log(`NEW implementation: ${newCalls} tiktoken calls, ${timeNew.toFixed(0)}ms`);
+ console.log(`Call reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
+ console.log(`Time reduction: ${((1 - timeNew / timeOld) * 100).toFixed(1)}%`);
+ console.log(
+ `Result: truncated=${result.wasTruncated}, tokens=${result.tokenCount}/${tokenLimit}\n`,
+ );
+
+ expect(newCalls).toBeLessThan(oldCalls);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ expect(newCalls).toBeLessThanOrEqual(7);
+ });
+
+ it('should achieve at least 70% reduction with real tokenizer', async () => {
+ const oldCounter = createRealTokenCounter();
+ const newCounter = createRealTokenCounter();
+ const text = createRealisticText(50000);
+ const tokenLimit = 10000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ Tokenizer.freeAndResetAllEncoders();
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+ const reduction = 1 - newCalls / oldCalls;
+
+ console.log(
+ `[Real tiktoken 50k tokens] OLD: ${oldCalls}, NEW: ${newCalls}, Reduction: ${(reduction * 100).toFixed(1)}%`,
+ );
+
+ expect(reduction).toBeGreaterThanOrEqual(0.7);
+ });
+ });
+
+ describe('using countTokens async function from @librechat/api', () => {
+ beforeEach(() => {
+ Tokenizer.freeAndResetAllEncoders();
+ });
+
+ it('countTokens should return correct token count', async () => {
+ const text = 'Hello, world!';
+ const count = await countTokens(text);
+
+ expect(count).toBeGreaterThan(0);
+ expect(typeof count).toBe('number');
+ });
+
+ it('countTokens should handle empty string', async () => {
+ const count = await countTokens('');
+ expect(count).toBe(0);
+ });
+
+ it('should work with processTextWithTokenLimit using countTokens', async () => {
+ const counter = createCountTokensCounter();
+ const text = createRealisticText(5000);
+ const tokenLimit = 1000;
+
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: counter.tokenCountFn,
+ });
+
+ expect(result.wasTruncated).toBe(true);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ expect(result.text.length).toBeLessThan(text.length);
+ });
+
+ it('should use fewer countTokens calls than old implementation', async () => {
+ const oldCounter = createCountTokensCounter();
+ const newCounter = createCountTokensCounter();
+ const text = createRealisticText(15000);
+ const tokenLimit = 5000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ Tokenizer.freeAndResetAllEncoders();
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+
+ console.log(`[countTokens ~15k tokens] OLD: ${oldCalls} calls, NEW: ${newCalls} calls`);
+ console.log(`[countTokens] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
+
+ expect(newCalls).toBeLessThan(oldCalls);
+ });
+
+ it('should handle user reported scenario with countTokens (~120k tokens)', async () => {
+ const oldCounter = createCountTokensCounter();
+ const newCounter = createCountTokensCounter();
+ const text = createRealisticText(120000);
+ const tokenLimit = 100000;
+
+ const startOld = performance.now();
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+ const timeOld = performance.now() - startOld;
+
+ Tokenizer.freeAndResetAllEncoders();
+
+ const startNew = performance.now();
+ const result = await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+ const timeNew = performance.now() - startNew;
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+
+ console.log(`\n[countTokens - User reported scenario: ~120k tokens]`);
+ console.log(`OLD implementation: ${oldCalls} countTokens calls, ${timeOld.toFixed(0)}ms`);
+ console.log(`NEW implementation: ${newCalls} countTokens calls, ${timeNew.toFixed(0)}ms`);
+ console.log(`Call reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`);
+ console.log(`Time reduction: ${((1 - timeNew / timeOld) * 100).toFixed(1)}%`);
+ console.log(
+ `Result: truncated=${result.wasTruncated}, tokens=${result.tokenCount}/${tokenLimit}\n`,
+ );
+
+ expect(newCalls).toBeLessThan(oldCalls);
+ expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit);
+ expect(newCalls).toBeLessThanOrEqual(7);
+ });
+
+ it('should achieve at least 70% reduction with countTokens', async () => {
+ const oldCounter = createCountTokensCounter();
+ const newCounter = createCountTokensCounter();
+ const text = createRealisticText(50000);
+ const tokenLimit = 10000;
+
+ await processTextWithTokenLimitOLD({
+ text,
+ tokenLimit,
+ tokenCountFn: oldCounter.tokenCountFn,
+ });
+
+ Tokenizer.freeAndResetAllEncoders();
+
+ await processTextWithTokenLimit({
+ text,
+ tokenLimit,
+ tokenCountFn: newCounter.tokenCountFn,
+ });
+
+ const oldCalls = oldCounter.getCallCount();
+ const newCalls = newCounter.getCallCount();
+ const reduction = 1 - newCalls / oldCalls;
+
+ console.log(
+ `[countTokens 50k tokens] OLD: ${oldCalls}, NEW: ${newCalls}, Reduction: ${(reduction * 100).toFixed(1)}%`,
+ );
+
+ expect(reduction).toBeGreaterThanOrEqual(0.7);
+ });
+ });
+});
diff --git a/packages/api/src/utils/text.ts b/packages/api/src/utils/text.ts
index 3de343bd32..3099c2bbc4 100644
--- a/packages/api/src/utils/text.ts
+++ b/packages/api/src/utils/text.ts
@@ -1,11 +1,39 @@
import { logger } from '@librechat/data-schemas';
+/** Token count function that can be sync or async */
+export type TokenCountFn = (text: string) => number | Promise;
+
+/**
+ * Safety buffer multiplier applied to character position estimates during truncation.
+ *
+ * We use 98% (0.98) rather than 100% to intentionally undershoot the target on the first attempt.
+ * This is necessary because:
+ * - Token density varies across text (some regions may have more tokens per character than the average)
+ * - The ratio-based estimate assumes uniform token distribution, which is rarely true
+ * - Undershooting is safer than overshooting: exceeding the limit requires another iteration,
+ * while being slightly under is acceptable
+ * - In practice, this buffer reduces refinement iterations from 2-3 down to 0-1 in most cases
+ *
+ * @example
+ * // If text has 1000 chars and 250 tokens (4 chars/token average), targeting 100 tokens:
+ * // Without buffer: estimate = 1000 * (100/250) = 400 chars → might yield 105 tokens (over!)
+ * // With 0.98 buffer: estimate = 400 * 0.98 = 392 chars → likely yields 97-99 tokens (safe)
+ */
+const TRUNCATION_SAFETY_BUFFER = 0.98;
+
/**
* Processes text content by counting tokens and truncating if it exceeds the specified limit.
+ * Uses ratio-based estimation to minimize expensive tokenCountFn calls.
+ *
* @param text - The text content to process
* @param tokenLimit - The maximum number of tokens allowed
- * @param tokenCountFn - Function to count tokens
+ * @param tokenCountFn - Function to count tokens (can be sync or async)
* @returns Promise resolving to object with processed text, token count, and truncation status
+ *
+ * @remarks
+ * This function uses a ratio-based estimation algorithm instead of binary search.
+ * Binary search would require O(log n) tokenCountFn calls (~17 for 100k chars),
+ * while this approach typically requires only 2-3 calls for a 90%+ reduction in CPU usage.
*/
export async function processTextWithTokenLimit({
text,
@@ -14,7 +42,7 @@ export async function processTextWithTokenLimit({
}: {
text: string;
tokenLimit: number;
- tokenCountFn: (text: string) => number;
+ tokenCountFn: TokenCountFn;
}): Promise<{ text: string; tokenCount: number; wasTruncated: boolean }> {
const originalTokenCount = await tokenCountFn(text);
@@ -26,40 +54,34 @@ export async function processTextWithTokenLimit({
};
}
- /**
- * Doing binary search here to find the truncation point efficiently
- * (May be a better way to go about this)
- */
- let low = 0;
- let high = text.length;
- let bestText = '';
-
logger.debug(
`[textTokenLimiter] Text content exceeds token limit: ${originalTokenCount} > ${tokenLimit}, truncating...`,
);
- while (low <= high) {
- const mid = Math.floor((low + high) / 2);
- const truncatedText = text.substring(0, mid);
- const tokenCount = await tokenCountFn(truncatedText);
+ const ratio = tokenLimit / originalTokenCount;
+ let charPosition = Math.floor(text.length * ratio * TRUNCATION_SAFETY_BUFFER);
- if (tokenCount <= tokenLimit) {
- bestText = truncatedText;
- low = mid + 1;
- } else {
- high = mid - 1;
- }
+ let truncatedText = text.substring(0, charPosition);
+ let tokenCount = await tokenCountFn(truncatedText);
+
+ const maxIterations = 5;
+ let iterations = 0;
+
+ while (tokenCount > tokenLimit && iterations < maxIterations && charPosition > 0) {
+ const overageRatio = tokenLimit / tokenCount;
+ charPosition = Math.floor(charPosition * overageRatio * TRUNCATION_SAFETY_BUFFER);
+ truncatedText = text.substring(0, charPosition);
+ tokenCount = await tokenCountFn(truncatedText);
+ iterations++;
}
- const finalTokenCount = await tokenCountFn(bestText);
-
logger.warn(
- `[textTokenLimiter] Text truncated from ${originalTokenCount} to ${finalTokenCount} tokens (limit: ${tokenLimit})`,
+ `[textTokenLimiter] Text truncated from ${originalTokenCount} to ${tokenCount} tokens (limit: ${tokenLimit})`,
);
return {
- text: bestText,
- tokenCount: finalTokenCount,
+ text: truncatedText,
+ tokenCount,
wasTruncated: true,
};
}
diff --git a/packages/api/src/utils/tokenizer.ts b/packages/api/src/utils/tokenizer.ts
index 2a2088cad3..0b0282d36b 100644
--- a/packages/api/src/utils/tokenizer.ts
+++ b/packages/api/src/utils/tokenizer.ts
@@ -75,4 +75,14 @@ class Tokenizer {
const TokenizerSingleton = new Tokenizer();
+/**
+ * Counts the number of tokens in a given text using tiktoken.
+ * This is an async wrapper around Tokenizer.getTokenCount for compatibility.
+ * @param text - The text to be tokenized. Defaults to an empty string if not provided.
+ * @returns The number of tokens in the provided text.
+ */
+export async function countTokens(text = ''): Promise {
+ return TokenizerSingleton.getTokenCount(text, 'cl100k_base');
+}
+
export default TokenizerSingleton;
From 711d21365dbc8768d3931e9732d1e3c405a6f389 Mon Sep 17 00:00:00 2001
From: Danny Avila
Date: Wed, 3 Dec 2025 10:29:23 -0500
Subject: [PATCH 62/78] =?UTF-8?q?=F0=9F=AA=9F=20refactor:=20Portal=20Contr?=
=?UTF-8?q?ol=20for=20Shared=20View=20Settings=20and=20Google=20Balance=20?=
=?UTF-8?q?Support=20(#10786)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* feat: Add support for Google model endpoint in balance check
* feat: Add portal prop to ThemeSelector and LangSelector in ShareView
---
.../src/components/Nav/SettingsTabs/General/General.tsx | 3 +++
client/src/components/Share/ShareView.tsx | 8 ++++++--
packages/data-provider/src/config.ts | 1 +
3 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/client/src/components/Nav/SettingsTabs/General/General.tsx b/client/src/components/Nav/SettingsTabs/General/General.tsx
index 1157f9fc8e..4a56dd6d25 100644
--- a/client/src/components/Nav/SettingsTabs/General/General.tsx
+++ b/client/src/components/Nav/SettingsTabs/General/General.tsx
@@ -41,9 +41,11 @@ const toggleSwitchConfigs = [
export const ThemeSelector = ({
theme,
onChange,
+ portal = true,
}: {
theme: string;
onChange: (value: string) => void;
+ portal?: boolean;
}) => {
const localize = useLocalize();
@@ -67,6 +69,7 @@ export const ThemeSelector = ({
testId="theme-selector"
className="z-50"
aria-labelledby={labelId}
+ portal={portal}
/>
);
diff --git a/client/src/components/Share/ShareView.tsx b/client/src/components/Share/ShareView.tsx
index a404796757..f58064acc8 100644
--- a/client/src/components/Share/ShareView.tsx
+++ b/client/src/components/Share/ShareView.tsx
@@ -227,9 +227,13 @@ function ShareHeader({