mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 08:20:14 +01:00
* 🧹 chore: Update logger imports to use @librechat/data-schemas across multiple files and remove unused sleep function from queue.js (#9930) * chore: Replace local isEnabled utility with @librechat/api import across multiple files, update test files * chore: Replace local logger import with @librechat/data-schemas logger in countTokens.js and fork.js * chore: Update logs volume path in docker-compose.yml to correct directory * chore: import order of isEnabled in static.js
37 lines
1.6 KiB
JavaScript
37 lines
1.6 KiB
JavaScript
const { Tiktoken } = require('tiktoken/lite');
|
|
const { logger } = require('@librechat/data-schemas');
|
|
const p50k_base = require('tiktoken/encoders/p50k_base.json');
|
|
const cl100k_base = require('tiktoken/encoders/cl100k_base.json');
|
|
|
|
/**
|
|
* Counts the number of tokens in a given text using a specified encoding model.
|
|
*
|
|
* This function utilizes the 'Tiktoken' library to encode text based on the selected model.
|
|
* It supports two models, 'text-davinci-003' and 'gpt-3.5-turbo', each with its own encoding strategy.
|
|
* For 'text-davinci-003', the 'p50k_base' encoder is used, whereas for other models, the 'cl100k_base' encoder is applied.
|
|
* In case of an error during encoding, the error is logged, and the function returns 0.
|
|
*
|
|
* @async
|
|
* @param {string} text - The text to be tokenized. Defaults to an empty string if not provided.
|
|
* @param {string} modelName - The name of the model used for tokenizing. Defaults to 'gpt-3.5-turbo'.
|
|
* @returns {Promise<number>} The number of tokens in the provided text. Returns 0 if an error occurs.
|
|
* @throws Logs the error to a logger and rethrows if any error occurs during tokenization.
|
|
*/
|
|
const countTokens = async (text = '', modelName = 'gpt-3.5-turbo') => {
|
|
let encoder = null;
|
|
try {
|
|
const model = modelName.includes('text-davinci-003') ? p50k_base : cl100k_base;
|
|
encoder = new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str);
|
|
const tokens = encoder.encode(text);
|
|
encoder.free();
|
|
return tokens.length;
|
|
} catch (e) {
|
|
logger.error('[countTokens]', e);
|
|
if (encoder) {
|
|
encoder.free();
|
|
}
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
module.exports = countTokens;
|