LibreChat/packages/api/src/utils/tokenizer.ts
Danny Avila 8bdc808074
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
refactor: Optimize & Standardize Tokenizer Usage (#10777)
* refactor: Token Limit Processing with Enhanced Efficiency

- Added a new test suite for `processTextWithTokenLimit`, ensuring comprehensive coverage of various scenarios including under, at, and exceeding token limits.
- Refactored the `processTextWithTokenLimit` function to utilize a ratio-based estimation method, significantly reducing the number of token counting function calls compared to the previous binary search approach.
- Improved handling of edge cases and variable token density, ensuring accurate truncation and performance across diverse text inputs.
- Included direct comparisons with the old implementation to validate correctness and efficiency improvements.

* refactor: Remove Tokenizer Route and Related References

- Deleted the tokenizer route from the server and removed its references from the routes index and server files, streamlining the API structure.
- This change simplifies the routing configuration by eliminating unused endpoints.

* refactor: Migrate countTokens Utility to API Module

- Removed the local countTokens utility and integrated it into the @librechat/api module for centralized access.
- Updated various files to reference the new countTokens import from the API module, ensuring consistent usage across the application.
- Cleaned up unused references and imports related to the previous countTokens implementation.

* refactor: Centralize escapeRegExp Utility in API Module

- Moved the escapeRegExp function from local utility files to the @librechat/api module for consistent usage across the application.
- Updated imports in various files to reference the new centralized escapeRegExp function, ensuring cleaner code and reducing redundancy.
- Removed duplicate implementations of escapeRegExp from multiple files, streamlining the codebase.

* refactor: Enhance Token Counting Flexibility in Text Processing

- Updated the `processTextWithTokenLimit` function to accept both synchronous and asynchronous token counting functions, improving its versatility.
- Introduced a new `TokenCountFn` type to define the token counting function signature.
- Added comprehensive tests to validate the behavior of `processTextWithTokenLimit` with both sync and async token counting functions, ensuring consistent results.
- Implemented a wrapper to track call counts for the `countTokens` function, optimizing performance and reducing unnecessary calls.
- Enhanced existing tests to compare the performance of the new implementation against the old one, demonstrating significant improvements in efficiency.

* chore: documentation for Truncation Safety Buffer in Token Processing

- Added a safety buffer multiplier to the character position estimates during text truncation to prevent overshooting token limits.
- Updated the `processTextWithTokenLimit` function to utilize the new `TRUNCATION_SAFETY_BUFFER` constant, enhancing the accuracy of token limit processing.
- Improved documentation to clarify the rationale behind the buffer and its impact on performance and efficiency in token counting.
2025-12-02 12:22:04 -05:00

88 lines
2.7 KiB
TypeScript

import { logger } from '@librechat/data-schemas';
import { encoding_for_model as encodingForModel, get_encoding as getEncoding } from 'tiktoken';
import type { Tiktoken, TiktokenModel, TiktokenEncoding } from 'tiktoken';
interface TokenizerOptions {
debug?: boolean;
}
class Tokenizer {
tokenizersCache: Record<string, Tiktoken>;
tokenizerCallsCount: number;
private options?: TokenizerOptions;
constructor() {
this.tokenizersCache = {};
this.tokenizerCallsCount = 0;
}
getTokenizer(
encoding: TiktokenModel | TiktokenEncoding,
isModelName = false,
extendSpecialTokens: Record<string, number> = {},
): Tiktoken {
let tokenizer: Tiktoken;
if (this.tokenizersCache[encoding]) {
tokenizer = this.tokenizersCache[encoding];
} else {
if (isModelName) {
tokenizer = encodingForModel(encoding as TiktokenModel, extendSpecialTokens);
} else {
tokenizer = getEncoding(encoding as TiktokenEncoding, extendSpecialTokens);
}
this.tokenizersCache[encoding] = tokenizer;
}
return tokenizer;
}
freeAndResetAllEncoders(): void {
try {
Object.keys(this.tokenizersCache).forEach((key) => {
if (this.tokenizersCache[key]) {
this.tokenizersCache[key].free();
delete this.tokenizersCache[key];
}
});
this.tokenizerCallsCount = 1;
} catch (error) {
logger.error('[Tokenizer] Free and reset encoders error', error);
}
}
resetTokenizersIfNecessary(): void {
if (this.tokenizerCallsCount >= 25) {
if (this.options?.debug) {
logger.debug('[Tokenizer] freeAndResetAllEncoders: reached 25 encodings, resetting...');
}
this.freeAndResetAllEncoders();
}
this.tokenizerCallsCount++;
}
getTokenCount(text: string, encoding: TiktokenModel | TiktokenEncoding = 'cl100k_base'): number {
this.resetTokenizersIfNecessary();
try {
const tokenizer = this.getTokenizer(encoding);
return tokenizer.encode(text, 'all').length;
} catch (error) {
logger.error('[Tokenizer] Error getting token count:', error);
this.freeAndResetAllEncoders();
const tokenizer = this.getTokenizer(encoding);
return tokenizer.encode(text, 'all').length;
}
}
}
const TokenizerSingleton = new Tokenizer();
/**
* Counts the number of tokens in a given text using tiktoken.
* This is an async wrapper around Tokenizer.getTokenCount for compatibility.
* @param text - The text to be tokenized. Defaults to an empty string if not provided.
* @returns The number of tokens in the provided text.
*/
export async function countTokens(text = ''): Promise<number> {
return TokenizerSingleton.getTokenCount(text, 'cl100k_base');
}
export default TokenizerSingleton;