mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-05 01:58:50 +01:00
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
* refactor: Token Limit Processing with Enhanced Efficiency - Added a new test suite for `processTextWithTokenLimit`, ensuring comprehensive coverage of various scenarios including under, at, and exceeding token limits. - Refactored the `processTextWithTokenLimit` function to utilize a ratio-based estimation method, significantly reducing the number of token counting function calls compared to the previous binary search approach. - Improved handling of edge cases and variable token density, ensuring accurate truncation and performance across diverse text inputs. - Included direct comparisons with the old implementation to validate correctness and efficiency improvements. * refactor: Remove Tokenizer Route and Related References - Deleted the tokenizer route from the server and removed its references from the routes index and server files, streamlining the API structure. - This change simplifies the routing configuration by eliminating unused endpoints. * refactor: Migrate countTokens Utility to API Module - Removed the local countTokens utility and integrated it into the @librechat/api module for centralized access. - Updated various files to reference the new countTokens import from the API module, ensuring consistent usage across the application. - Cleaned up unused references and imports related to the previous countTokens implementation. * refactor: Centralize escapeRegExp Utility in API Module - Moved the escapeRegExp function from local utility files to the @librechat/api module for consistent usage across the application. - Updated imports in various files to reference the new centralized escapeRegExp function, ensuring cleaner code and reducing redundancy. - Removed duplicate implementations of escapeRegExp from multiple files, streamlining the codebase. * refactor: Enhance Token Counting Flexibility in Text Processing - Updated the `processTextWithTokenLimit` function to accept both synchronous and asynchronous token counting functions, improving its versatility. - Introduced a new `TokenCountFn` type to define the token counting function signature. - Added comprehensive tests to validate the behavior of `processTextWithTokenLimit` with both sync and async token counting functions, ensuring consistent results. - Implemented a wrapper to track call counts for the `countTokens` function, optimizing performance and reducing unnecessary calls. - Enhanced existing tests to compare the performance of the new implementation against the old one, demonstrating significant improvements in efficiency. * chore: documentation for Truncation Safety Buffer in Token Processing - Added a safety buffer multiplier to the character position estimates during text truncation to prevent overshooting token limits. - Updated the `processTextWithTokenLimit` function to utilize the new `TRUNCATION_SAFETY_BUFFER` constant, enhancing the accuracy of token limit processing. - Improved documentation to clarify the rationale behind the buffer and its impact on performance and efficiency in token counting.
88 lines
2.7 KiB
TypeScript
88 lines
2.7 KiB
TypeScript
import { logger } from '@librechat/data-schemas';
|
|
import { encoding_for_model as encodingForModel, get_encoding as getEncoding } from 'tiktoken';
|
|
import type { Tiktoken, TiktokenModel, TiktokenEncoding } from 'tiktoken';
|
|
|
|
interface TokenizerOptions {
|
|
debug?: boolean;
|
|
}
|
|
|
|
class Tokenizer {
|
|
tokenizersCache: Record<string, Tiktoken>;
|
|
tokenizerCallsCount: number;
|
|
private options?: TokenizerOptions;
|
|
|
|
constructor() {
|
|
this.tokenizersCache = {};
|
|
this.tokenizerCallsCount = 0;
|
|
}
|
|
|
|
getTokenizer(
|
|
encoding: TiktokenModel | TiktokenEncoding,
|
|
isModelName = false,
|
|
extendSpecialTokens: Record<string, number> = {},
|
|
): Tiktoken {
|
|
let tokenizer: Tiktoken;
|
|
if (this.tokenizersCache[encoding]) {
|
|
tokenizer = this.tokenizersCache[encoding];
|
|
} else {
|
|
if (isModelName) {
|
|
tokenizer = encodingForModel(encoding as TiktokenModel, extendSpecialTokens);
|
|
} else {
|
|
tokenizer = getEncoding(encoding as TiktokenEncoding, extendSpecialTokens);
|
|
}
|
|
this.tokenizersCache[encoding] = tokenizer;
|
|
}
|
|
return tokenizer;
|
|
}
|
|
|
|
freeAndResetAllEncoders(): void {
|
|
try {
|
|
Object.keys(this.tokenizersCache).forEach((key) => {
|
|
if (this.tokenizersCache[key]) {
|
|
this.tokenizersCache[key].free();
|
|
delete this.tokenizersCache[key];
|
|
}
|
|
});
|
|
this.tokenizerCallsCount = 1;
|
|
} catch (error) {
|
|
logger.error('[Tokenizer] Free and reset encoders error', error);
|
|
}
|
|
}
|
|
|
|
resetTokenizersIfNecessary(): void {
|
|
if (this.tokenizerCallsCount >= 25) {
|
|
if (this.options?.debug) {
|
|
logger.debug('[Tokenizer] freeAndResetAllEncoders: reached 25 encodings, resetting...');
|
|
}
|
|
this.freeAndResetAllEncoders();
|
|
}
|
|
this.tokenizerCallsCount++;
|
|
}
|
|
|
|
getTokenCount(text: string, encoding: TiktokenModel | TiktokenEncoding = 'cl100k_base'): number {
|
|
this.resetTokenizersIfNecessary();
|
|
try {
|
|
const tokenizer = this.getTokenizer(encoding);
|
|
return tokenizer.encode(text, 'all').length;
|
|
} catch (error) {
|
|
logger.error('[Tokenizer] Error getting token count:', error);
|
|
this.freeAndResetAllEncoders();
|
|
const tokenizer = this.getTokenizer(encoding);
|
|
return tokenizer.encode(text, 'all').length;
|
|
}
|
|
}
|
|
}
|
|
|
|
const TokenizerSingleton = new Tokenizer();
|
|
|
|
/**
|
|
* Counts the number of tokens in a given text using tiktoken.
|
|
* This is an async wrapper around Tokenizer.getTokenCount for compatibility.
|
|
* @param text - The text to be tokenized. Defaults to an empty string if not provided.
|
|
* @returns The number of tokens in the provided text.
|
|
*/
|
|
export async function countTokens(text = ''): Promise<number> {
|
|
return TokenizerSingleton.getTokenCount(text, 'cl100k_base');
|
|
}
|
|
|
|
export default TokenizerSingleton;
|