⚡ refactor: Optimize & Standardize Tokenizer Usage (#10777)

* refactor: Token Limit Processing with Enhanced Efficiency - Added a new test suite for `processTextWithTokenLimit`, ensuring comprehensive coverage of various scenarios including under, at, and exceeding token limits. - Refactored the `processTextWithTokenLimit` function to utilize a ratio-based estimation method, significantly reducing the number of token counting function calls compared to the previous binary search approach. - Improved handling of edge cases and variable token density, ensuring accurate truncation and performance across diverse text inputs. - Included direct comparisons with the old implementation to validate correctness and efficiency improvements. * refactor: Remove Tokenizer Route and Related References - Deleted the tokenizer route from the server and removed its references from the routes index and server files, streamlining the API structure. - This change simplifies the routing configuration by eliminating unused endpoints. * refactor: Migrate countTokens Utility to API Module - Removed the local countTokens utility and integrated it into the @librechat/api module for centralized access. - Updated various files to reference the new countTokens import from the API module, ensuring consistent usage across the application. - Cleaned up unused references and imports related to the previous countTokens implementation. * refactor: Centralize escapeRegExp Utility in API Module - Moved the escapeRegExp function from local utility files to the @librechat/api module for consistent usage across the application. - Updated imports in various files to reference the new centralized escapeRegExp function, ensuring cleaner code and reducing redundancy. - Removed duplicate implementations of escapeRegExp from multiple files, streamlining the codebase. * refactor: Enhance Token Counting Flexibility in Text Processing - Updated the `processTextWithTokenLimit` function to accept both synchronous and asynchronous token counting functions, improving its versatility. - Introduced a new `TokenCountFn` type to define the token counting function signature. - Added comprehensive tests to validate the behavior of `processTextWithTokenLimit` with both sync and async token counting functions, ensuring consistent results. - Implemented a wrapper to track call counts for the `countTokens` function, optimizing performance and reducing unnecessary calls. - Enhanced existing tests to compare the performance of the new implementation against the old one, demonstrating significant improvements in efficiency. * chore: documentation for Truncation Safety Buffer in Token Processing - Added a safety buffer multiplier to the character position estimates during text truncation to prevent overshooting token limits. - Updated the `processTextWithTokenLimit` function to utilize the new `TRUNCATION_SAFETY_BUFFER` constant, enhancing the accuracy of token limit processing. - Improved documentation to clarify the rationale behind the buffer and its impact on performance and efficiency in token counting.
2026-03-19 22:26:33 +01:00 · 2025-12-02 12:22:04 -05:00 · 2025-12-02 12:22:04 -05:00 · 8bdc808074
commit 8bdc808074
parent b2387cc6fa
19 changed files with 925 additions and 107 deletions
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -2,6 +2,7 @@ const crypto = require('crypto');
 const fetch = require('node-fetch');
 const { logger } = require('@librechat/data-schemas');
 const {
+  countTokens,
  getBalanceConfig,
  extractFileContext,
  encodeAndFormatAudios,
@ -23,7 +24,6 @@ const { getMessages, saveMessage, updateMessage, saveConvo, getConvo } = require
 const { getStrategyFunctions } = require('~/server/services/Files/strategies');
 const { checkBalance } = require('~/models/balanceMethods');
 const { truncateToolCallOutputs } = require('./prompts');
-const countTokens = require('~/server/utils/countTokens');
 const { getFiles } = require('~/models/File');
 const TextStream = require('./TextStream');

--- a/api/models/Prompt.js
+++ b/api/models/Prompt.js
@ -1,4 +1,5 @@
 const { ObjectId } = require('mongodb');
+const { escapeRegExp } = require('@librechat/api');
 const { logger } = require('@librechat/data-schemas');
 const {
  Constants,
@ -14,7 +15,6 @@ const {
 } = require('./Project');
 const { removeAllPermissions } = require('~/server/services/PermissionService');
 const { PromptGroup, Prompt, AclEntry } = require('~/db/models');
-const { escapeRegExp } = require('~/server/utils');

 /**
 * Create a pipeline for the aggregation to get prompt groups
--- a/api/server/controllers/assistants/chatV1.js
+++ b/api/server/controllers/assistants/chatV1.js
@ -1,7 +1,7 @@
 const { v4 } = require('uuid');
 const { sleep } = require('@librechat/agents');
 const { logger } = require('@librechat/data-schemas');
-const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
+const { sendEvent, getBalanceConfig, getModelMaxTokens, countTokens } = require('@librechat/api');
 const {
  Time,
  Constants,
@ -33,7 +33,6 @@ const { getTransactions } = require('~/models/Transaction');
 const { checkBalance } = require('~/models/balanceMethods');
 const { getConvo } = require('~/models/Conversation');
 const getLogStores = require('~/cache/getLogStores');
-const { countTokens } = require('~/server/utils');
 const { getOpenAIClient } = require('./helpers');

 /**
--- a/api/server/controllers/assistants/chatV2.js
+++ b/api/server/controllers/assistants/chatV2.js
@ -1,7 +1,7 @@
 const { v4 } = require('uuid');
 const { sleep } = require('@librechat/agents');
 const { logger } = require('@librechat/data-schemas');
-const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
+const { sendEvent, getBalanceConfig, getModelMaxTokens, countTokens } = require('@librechat/api');
 const {
  Time,
  Constants,
@ -30,7 +30,6 @@ const { getTransactions } = require('~/models/Transaction');
 const { checkBalance } = require('~/models/balanceMethods');
 const { getConvo } = require('~/models/Conversation');
 const getLogStores = require('~/cache/getLogStores');
-const { countTokens } = require('~/server/utils');
 const { getOpenAIClient } = require('./helpers');

 /**
--- a/api/server/experimental.js
+++ b/api/server/experimental.js
@ -292,7 +292,6 @@ if (cluster.isMaster) {
    app.use('/api/presets', routes.presets);
    app.use('/api/prompts', routes.prompts);
    app.use('/api/categories', routes.categories);
-    app.use('/api/tokenizer', routes.tokenizer);
    app.use('/api/endpoints', routes.endpoints);
    app.use('/api/balance', routes.balance);
    app.use('/api/models', routes.models);
--- a/api/server/index.js
+++ b/api/server/index.js
@ -128,7 +128,6 @@ const startServer = async () => {
  app.use('/api/presets', routes.presets);
  app.use('/api/prompts', routes.prompts);
  app.use('/api/categories', routes.categories);
-  app.use('/api/tokenizer', routes.tokenizer);
  app.use('/api/endpoints', routes.endpoints);
  app.use('/api/balance', routes.balance);
  app.use('/api/models', routes.models);
--- a/api/server/routes/index.js
+++ b/api/server/routes/index.js
@ -1,7 +1,6 @@
 const accessPermissions = require('./accessPermissions');
 const assistants = require('./assistants');
 const categories = require('./categories');
-const tokenizer = require('./tokenizer');
 const endpoints = require('./endpoints');
 const staticRoute = require('./static');
 const messages = require('./messages');
@ -53,7 +52,6 @@ module.exports = {
  messages,
  memories,
  endpoints,
-  tokenizer,
  assistants,
  categories,
  staticRoute,
--- a/api/server/routes/messages.js
+++ b/api/server/routes/messages.js
@ -1,7 +1,7 @@
 const express = require('express');
-const { unescapeLaTeX } = require('@librechat/api');
 const { logger } = require('@librechat/data-schemas');
 const { ContentTypes } = require('librechat-data-provider');
+const { unescapeLaTeX, countTokens } = require('@librechat/api');
 const {
  saveConvo,
  getMessage,
@ -14,7 +14,6 @@ const { findAllArtifacts, replaceArtifactContent } = require('~/server/services/
 const { requireJwtAuth, validateMessageReq } = require('~/server/middleware');
 const { cleanUpPrimaryKeyValue } = require('~/lib/utils/misc');
 const { getConvosQueried } = require('~/models/Conversation');
-const { countTokens } = require('~/server/utils');
 const { Message } = require('~/db/models');

 const router = express.Router();
--- a/api/server/routes/tokenizer.js
+++ b/api/server/routes/tokenizer.js
@ -1,19 +0,0 @@
-const express = require('express');
-const { logger } = require('@librechat/data-schemas');
-const requireJwtAuth = require('~/server/middleware/requireJwtAuth');
-const { countTokens } = require('~/server/utils');
-
-const router = express.Router();
-
-router.post('/', requireJwtAuth, async (req, res) => {
-  try {
-    const { arg } = req.body;
-    const count = await countTokens(arg?.text ?? arg);
-    res.send({ count });
-  } catch (e) {
-    logger.error('[/tokenizer] Error counting tokens', e);
-    res.status(500).json('Error counting tokens');
-  }
-});
-
-module.exports = router;
--- a/api/server/services/Threads/manage.js
+++ b/api/server/services/Threads/manage.js
@ -1,5 +1,6 @@
 const path = require('path');
 const { v4 } = require('uuid');
+const { countTokens, escapeRegExp } = require('@librechat/api');
 const {
  Constants,
  ContentTypes,
@ -8,7 +9,6 @@ const {
 } = require('librechat-data-provider');
 const { retrieveAndProcessFile } = require('~/server/services/Files/process');
 const { recordMessage, getMessages } = require('~/models/Message');
-const { countTokens, escapeRegExp } = require('~/server/utils');
 const { spendTokens } = require('~/models/spendTokens');
 const { saveConvo } = require('~/models/Conversation');

--- a/api/server/utils/countTokens.js
+++ b/api/server/utils/countTokens.js
@ -1,37 +0,0 @@
-const { Tiktoken } = require('tiktoken/lite');
-const { logger } = require('@librechat/data-schemas');
-const p50k_base = require('tiktoken/encoders/p50k_base.json');
-const cl100k_base = require('tiktoken/encoders/cl100k_base.json');
-
-/**
- * Counts the number of tokens in a given text using a specified encoding model.
- *
- * This function utilizes the 'Tiktoken' library to encode text based on the selected model.
- * It supports two models, 'text-davinci-003' and 'gpt-3.5-turbo', each with its own encoding strategy.
- * For 'text-davinci-003', the 'p50k_base' encoder is used, whereas for other models, the 'cl100k_base' encoder is applied.
- * In case of an error during encoding, the error is logged, and the function returns 0.
- *
- * @async
- * @param {string} text - The text to be tokenized. Defaults to an empty string if not provided.
- * @param {string} modelName - The name of the model used for tokenizing. Defaults to 'gpt-3.5-turbo'.
- * @returns {Promise<number>} The number of tokens in the provided text. Returns 0 if an error occurs.
- * @throws Logs the error to a logger and rethrows if any error occurs during tokenization.
- */
-const countTokens = async (text = '', modelName = 'gpt-3.5-turbo') => {
-  let encoder = null;
-  try {
-    const model = modelName.includes('text-davinci-003') ? p50k_base : cl100k_base;
-    encoder = new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str);
-    const tokens = encoder.encode(text);
-    encoder.free();
-    return tokens.length;
-  } catch (e) {
-    logger.error('[countTokens]', e);
-    if (encoder) {
-      encoder.free();
-    }
-    return 0;
-  }
-};
-
-module.exports = countTokens;
--- a/api/server/utils/handleText.js
+++ b/api/server/utils/handleText.js
@ -10,14 +10,6 @@ const {
 const { sendEvent } = require('@librechat/api');
 const partialRight = require('lodash/partialRight');

-/** Helper function to escape special characters in regex
- * @param {string} string - The string to escape.
- * @returns {string} The escaped string.
- */
-function escapeRegExp(string) {
-  return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-}
-
 const addSpaceIfNeeded = (text) => (text.length > 0 && !text.endsWith(' ') ? text + ' ' : text);

 const base = { message: true, initial: true };
@ -181,7 +173,6 @@ function generateConfig(key, baseURL, endpoint) {
 module.exports = {
  handleText,
  formatSteps,
-  escapeRegExp,
  formatAction,
  isUserProvided,
  generateConfig,
--- a/api/server/utils/index.js
+++ b/api/server/utils/index.js
@ -1,5 +1,4 @@
 const removePorts = require('./removePorts');
-const countTokens = require('./countTokens');
 const handleText = require('./handleText');
 const sendEmail = require('./sendEmail');
 const queue = require('./queue');
@ -7,7 +6,6 @@ const files = require('./files');

 module.exports = {
  ...handleText,
-  countTokens,
  removePorts,
  sendEmail,
  ...files,