⚡ refactor: Replace tiktoken with ai-tokenizer (#12175)

* chore: Update dependencies by adding ai-tokenizer and removing tiktoken - Added ai-tokenizer version 1.0.6 to package.json and package-lock.json across multiple packages. - Removed tiktoken version 1.0.15 from package.json and package-lock.json in the same locations, streamlining dependency management. * refactor: replace js-tiktoken with ai-tokenizer - Added support for 'claude' encoding in the AgentClient class to improve model compatibility. - Updated Tokenizer class to utilize 'ai-tokenizer' for both 'o200k_base' and 'claude' encodings, replacing the previous 'tiktoken' dependency. - Refactored tests to reflect changes in tokenizer behavior and ensure accurate token counting for both encoding types. - Removed deprecated references to 'tiktoken' and adjusted related tests for improved clarity and functionality. * chore: remove tiktoken mocks from DALLE3 tests - Eliminated mock implementations of 'tiktoken' from DALLE3-related test files to streamline test setup and align with recent dependency updates. - Adjusted related test structures to ensure compatibility with the new tokenizer implementation. * chore: Add distinct encoding support for Anthropic Claude models - Introduced a new method `getEncoding` in the AgentClient class to handle the specific BPE tokenizer for Claude models, ensuring compatibility with the distinct encoding requirements. - Updated documentation to clarify the encoding logic for Claude and other models. * docs: Update return type documentation for getEncoding method in AgentClient - Clarified the return type of the getEncoding method to specify that it can return an EncodingName or undefined, enhancing code readability and type safety. * refactor: Tokenizer class and error handling - Exported the EncodingName type for broader usage. - Renamed encodingMap to encodingData for clarity. - Improved error handling in getTokenCount method to ensure recovery attempts are logged and return 0 on failure. - Updated countTokens function documentation to specify the use of 'o200k_base' encoding. * refactor: Simplify encoding documentation and export type - Updated the getEncoding method documentation to clarify the default behavior for non-Anthropic Claude models. - Exported the EncodingName type separately from the Tokenizer module for improved clarity and usage. * test: Update text processing tests for token limits - Adjusted test cases to handle smaller text sizes, changing scenarios from ~120k tokens to ~20k tokens for both the real tokenizer and countTokens functions. - Updated token limits in tests to reflect new constraints, ensuring tests accurately assess performance and call reduction. - Enhanced console log messages for clarity regarding token counts and reductions in the updated scenarios. * refactor: Update Tokenizer imports and exports - Moved Tokenizer and countTokens exports to the tokenizer module for better organization. - Adjusted imports in memory.ts to reflect the new structure, ensuring consistent usage across the codebase. - Updated memory.test.ts to mock the Tokenizer from the correct module path, enhancing test accuracy. * refactor: Tokenizer initialization and error handling - Introduced an async `initEncoding` method to preload tokenizers, improving performance and accuracy in token counting. - Updated `getTokenCount` to handle uninitialized tokenizers more gracefully, ensuring proper recovery and logging on errors. - Removed deprecated synchronous tokenizer retrieval, streamlining the overall tokenizer management process. * test: Enhance tokenizer tests with initialization and encoding checks - Added `beforeAll` hooks to initialize tokenizers for 'o200k_base' and 'claude' encodings before running tests, ensuring proper setup. - Updated tests to validate the loading of encodings and the correctness of token counts for both 'o200k_base' and 'claude'. - Improved test structure to deduplicate concurrent initialization calls, enhancing performance and reliability.
2026-04-04 14:57:20 +02:00 · 2026-03-10 23:14:52 -04:00 · 2026-03-10 23:14:52 -04:00 · 9a5d7eaa4e
commit 9a5d7eaa4e
parent fcb344da47
15 changed files with 112 additions and 277 deletions
--- a/packages/api/src/utils/tokenizer.ts
+++ b/packages/api/src/utils/tokenizer.ts
@ -1,74 +1,46 @@
 import { logger } from '@librechat/data-schemas';
-import { encoding_for_model as encodingForModel, get_encoding as getEncoding } from 'tiktoken';
-import type { Tiktoken, TiktokenModel, TiktokenEncoding } from 'tiktoken';
+import { Tokenizer as AiTokenizer } from 'ai-tokenizer';

-interface TokenizerOptions {
-  debug?: boolean;
-}
+export type EncodingName = 'o200k_base' | 'claude';
+
+type EncodingData = ConstructorParameters<typeof AiTokenizer>[0];

 class Tokenizer {
-  tokenizersCache: Record<string, Tiktoken>;
-  tokenizerCallsCount: number;
-  private options?: TokenizerOptions;
+  private tokenizersCache: Partial<Record<EncodingName, AiTokenizer>> = {};
+  private loadingPromises: Partial<Record<EncodingName, Promise<void>>> = {};

-  constructor() {
-    this.tokenizersCache = {};
-    this.tokenizerCallsCount = 0;
-  }
-
-  getTokenizer(
-    encoding: TiktokenModel | TiktokenEncoding,
-    isModelName = false,
-    extendSpecialTokens: Record<string, number> = {},
-  ): Tiktoken {
-    let tokenizer: Tiktoken;
+  /** Pre-loads an encoding so that subsequent getTokenCount calls are accurate. */
+  async initEncoding(encoding: EncodingName): Promise<void> {
    if (this.tokenizersCache[encoding]) {
-      tokenizer = this.tokenizersCache[encoding];
-    } else {
-      if (isModelName) {
-        tokenizer = encodingForModel(encoding as TiktokenModel, extendSpecialTokens);
-      } else {
-        tokenizer = getEncoding(encoding as TiktokenEncoding, extendSpecialTokens);
-      }
-      this.tokenizersCache[encoding] = tokenizer;
+      return;
    }
-    return tokenizer;
+    if (this.loadingPromises[encoding]) {
+      return this.loadingPromises[encoding];
+    }
+    this.loadingPromises[encoding] = (async () => {
+      const data: EncodingData =
+        encoding === 'claude'
+          ? await import('ai-tokenizer/encoding/claude')
+          : await import('ai-tokenizer/encoding/o200k_base');
+      this.tokenizersCache[encoding] = new AiTokenizer(data);
+    })();
+    return this.loadingPromises[encoding];
  }

-  freeAndResetAllEncoders(): void {
+  getTokenCount(text: string, encoding: EncodingName = 'o200k_base'): number {
+    const tokenizer = this.tokenizersCache[encoding];
+    if (!tokenizer) {
+      this.initEncoding(encoding);
+      return Math.ceil(text.length / 4);
+    }
    try {
-      Object.keys(this.tokenizersCache).forEach((key) => {
-        if (this.tokenizersCache[key]) {
-          this.tokenizersCache[key].free();
-          delete this.tokenizersCache[key];
-        }
-      });
-      this.tokenizerCallsCount = 1;
-    } catch (error) {
-      logger.error('[Tokenizer] Free and reset encoders error', error);
-    }
-  }
-
-  resetTokenizersIfNecessary(): void {
-    if (this.tokenizerCallsCount >= 25) {
-      if (this.options?.debug) {
-        logger.debug('[Tokenizer] freeAndResetAllEncoders: reached 25 encodings, resetting...');
-      }
-      this.freeAndResetAllEncoders();
-    }
-    this.tokenizerCallsCount++;
-  }
-
-  getTokenCount(text: string, encoding: TiktokenModel | TiktokenEncoding = 'cl100k_base'): number {
-    this.resetTokenizersIfNecessary();
-    try {
-      const tokenizer = this.getTokenizer(encoding);
-      return tokenizer.encode(text, 'all').length;
+      return tokenizer.count(text);
    } catch (error) {
      logger.error('[Tokenizer] Error getting token count:', error);
-      this.freeAndResetAllEncoders();
-      const tokenizer = this.getTokenizer(encoding);
-      return tokenizer.encode(text, 'all').length;
+      delete this.tokenizersCache[encoding];
+      delete this.loadingPromises[encoding];
+      this.initEncoding(encoding);
+      return Math.ceil(text.length / 4);
    }
  }
 }
@ -76,13 +48,13 @@ class Tokenizer {
 const TokenizerSingleton = new Tokenizer();

 /**
- * Counts the number of tokens in a given text using tiktoken.
- * This is an async wrapper around Tokenizer.getTokenCount for compatibility.
- * @param text - The text to be tokenized. Defaults to an empty string if not provided.
+ * Counts the number of tokens in a given text using ai-tokenizer with o200k_base encoding.
+ * @param text - The text to count tokens in. Defaults to an empty string.
 * @returns The number of tokens in the provided text.
 */
 export async function countTokens(text = ''): Promise<number> {
-  return TokenizerSingleton.getTokenCount(text, 'cl100k_base');
+  await TokenizerSingleton.initEncoding('o200k_base');
+  return TokenizerSingleton.getTokenCount(text, 'o200k_base');
 }

 export default TokenizerSingleton;