refactor: Replace tiktoken with ai-tokenizer (#12175)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Has been cancelled
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Has been cancelled
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Has been cancelled

* chore: Update dependencies by adding ai-tokenizer and removing tiktoken

- Added ai-tokenizer version 1.0.6 to package.json and package-lock.json across multiple packages.
- Removed tiktoken version 1.0.15 from package.json and package-lock.json in the same locations, streamlining dependency management.

* refactor: replace js-tiktoken with ai-tokenizer

- Added support for 'claude' encoding in the AgentClient class to improve model compatibility.
- Updated Tokenizer class to utilize 'ai-tokenizer' for both 'o200k_base' and 'claude' encodings, replacing the previous 'tiktoken' dependency.
- Refactored tests to reflect changes in tokenizer behavior and ensure accurate token counting for both encoding types.
- Removed deprecated references to 'tiktoken' and adjusted related tests for improved clarity and functionality.

* chore: remove tiktoken mocks from DALLE3 tests

- Eliminated mock implementations of 'tiktoken' from DALLE3-related test files to streamline test setup and align with recent dependency updates.
- Adjusted related test structures to ensure compatibility with the new tokenizer implementation.

* chore: Add distinct encoding support for Anthropic Claude models

- Introduced a new method `getEncoding` in the AgentClient class to handle the specific BPE tokenizer for Claude models, ensuring compatibility with the distinct encoding requirements.
- Updated documentation to clarify the encoding logic for Claude and other models.

* docs: Update return type documentation for getEncoding method in AgentClient

- Clarified the return type of the getEncoding method to specify that it can return an EncodingName or undefined, enhancing code readability and type safety.

* refactor: Tokenizer class and error handling

- Exported the EncodingName type for broader usage.
- Renamed encodingMap to encodingData for clarity.
- Improved error handling in getTokenCount method to ensure recovery attempts are logged and return 0 on failure.
- Updated countTokens function documentation to specify the use of 'o200k_base' encoding.

* refactor: Simplify encoding documentation and export type

- Updated the getEncoding method documentation to clarify the default behavior for non-Anthropic Claude models.
- Exported the EncodingName type separately from the Tokenizer module for improved clarity and usage.

* test: Update text processing tests for token limits

- Adjusted test cases to handle smaller text sizes, changing scenarios from ~120k tokens to ~20k tokens for both the real tokenizer and countTokens functions.
- Updated token limits in tests to reflect new constraints, ensuring tests accurately assess performance and call reduction.
- Enhanced console log messages for clarity regarding token counts and reductions in the updated scenarios.

* refactor: Update Tokenizer imports and exports

- Moved Tokenizer and countTokens exports to the tokenizer module for better organization.
- Adjusted imports in memory.ts to reflect the new structure, ensuring consistent usage across the codebase.
- Updated memory.test.ts to mock the Tokenizer from the correct module path, enhancing test accuracy.

* refactor: Tokenizer initialization and error handling

- Introduced an async `initEncoding` method to preload tokenizers, improving performance and accuracy in token counting.
- Updated `getTokenCount` to handle uninitialized tokenizers more gracefully, ensuring proper recovery and logging on errors.
- Removed deprecated synchronous tokenizer retrieval, streamlining the overall tokenizer management process.

* test: Enhance tokenizer tests with initialization and encoding checks

- Added `beforeAll` hooks to initialize tokenizers for 'o200k_base' and 'claude' encodings before running tests, ensuring proper setup.
- Updated tests to validate the loading of encodings and the correctness of token counts for both 'o200k_base' and 'claude'.
- Improved test structure to deduplicate concurrent initialization calls, enhancing performance and reliability.
This commit is contained in:
Danny Avila 2026-03-10 23:14:52 -04:00 committed by GitHub
parent fcb344da47
commit 9a5d7eaa4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 112 additions and 277 deletions

View file

@ -1,74 +1,46 @@
import { logger } from '@librechat/data-schemas';
import { encoding_for_model as encodingForModel, get_encoding as getEncoding } from 'tiktoken';
import type { Tiktoken, TiktokenModel, TiktokenEncoding } from 'tiktoken';
import { Tokenizer as AiTokenizer } from 'ai-tokenizer';
interface TokenizerOptions {
debug?: boolean;
}
export type EncodingName = 'o200k_base' | 'claude';
type EncodingData = ConstructorParameters<typeof AiTokenizer>[0];
class Tokenizer {
tokenizersCache: Record<string, Tiktoken>;
tokenizerCallsCount: number;
private options?: TokenizerOptions;
private tokenizersCache: Partial<Record<EncodingName, AiTokenizer>> = {};
private loadingPromises: Partial<Record<EncodingName, Promise<void>>> = {};
constructor() {
this.tokenizersCache = {};
this.tokenizerCallsCount = 0;
}
getTokenizer(
encoding: TiktokenModel | TiktokenEncoding,
isModelName = false,
extendSpecialTokens: Record<string, number> = {},
): Tiktoken {
let tokenizer: Tiktoken;
/** Pre-loads an encoding so that subsequent getTokenCount calls are accurate. */
async initEncoding(encoding: EncodingName): Promise<void> {
if (this.tokenizersCache[encoding]) {
tokenizer = this.tokenizersCache[encoding];
} else {
if (isModelName) {
tokenizer = encodingForModel(encoding as TiktokenModel, extendSpecialTokens);
} else {
tokenizer = getEncoding(encoding as TiktokenEncoding, extendSpecialTokens);
}
this.tokenizersCache[encoding] = tokenizer;
return;
}
return tokenizer;
if (this.loadingPromises[encoding]) {
return this.loadingPromises[encoding];
}
this.loadingPromises[encoding] = (async () => {
const data: EncodingData =
encoding === 'claude'
? await import('ai-tokenizer/encoding/claude')
: await import('ai-tokenizer/encoding/o200k_base');
this.tokenizersCache[encoding] = new AiTokenizer(data);
})();
return this.loadingPromises[encoding];
}
freeAndResetAllEncoders(): void {
getTokenCount(text: string, encoding: EncodingName = 'o200k_base'): number {
const tokenizer = this.tokenizersCache[encoding];
if (!tokenizer) {
this.initEncoding(encoding);
return Math.ceil(text.length / 4);
}
try {
Object.keys(this.tokenizersCache).forEach((key) => {
if (this.tokenizersCache[key]) {
this.tokenizersCache[key].free();
delete this.tokenizersCache[key];
}
});
this.tokenizerCallsCount = 1;
} catch (error) {
logger.error('[Tokenizer] Free and reset encoders error', error);
}
}
resetTokenizersIfNecessary(): void {
if (this.tokenizerCallsCount >= 25) {
if (this.options?.debug) {
logger.debug('[Tokenizer] freeAndResetAllEncoders: reached 25 encodings, resetting...');
}
this.freeAndResetAllEncoders();
}
this.tokenizerCallsCount++;
}
getTokenCount(text: string, encoding: TiktokenModel | TiktokenEncoding = 'cl100k_base'): number {
this.resetTokenizersIfNecessary();
try {
const tokenizer = this.getTokenizer(encoding);
return tokenizer.encode(text, 'all').length;
return tokenizer.count(text);
} catch (error) {
logger.error('[Tokenizer] Error getting token count:', error);
this.freeAndResetAllEncoders();
const tokenizer = this.getTokenizer(encoding);
return tokenizer.encode(text, 'all').length;
delete this.tokenizersCache[encoding];
delete this.loadingPromises[encoding];
this.initEncoding(encoding);
return Math.ceil(text.length / 4);
}
}
}
@ -76,13 +48,13 @@ class Tokenizer {
const TokenizerSingleton = new Tokenizer();
/**
* Counts the number of tokens in a given text using tiktoken.
* This is an async wrapper around Tokenizer.getTokenCount for compatibility.
* @param text - The text to be tokenized. Defaults to an empty string if not provided.
* Counts the number of tokens in a given text using ai-tokenizer with o200k_base encoding.
* @param text - The text to count tokens in. Defaults to an empty string.
* @returns The number of tokens in the provided text.
*/
export async function countTokens(text = ''): Promise<number> {
return TokenizerSingleton.getTokenCount(text, 'cl100k_base');
await TokenizerSingleton.initEncoding('o200k_base');
return TokenizerSingleton.getTokenCount(text, 'o200k_base');
}
export default TokenizerSingleton;