LibreChat/api/server/utils/countTokens.js
Danny Avila dbe4dd96b4
🧹 chore: Cleanup Logger and Utility Imports (#9935)
* 🧹 chore: Update logger imports to use @librechat/data-schemas across multiple files and remove unused sleep function from queue.js (#9930)

* chore: Replace local isEnabled utility with @librechat/api import across multiple files, update test files

* chore: Replace local logger import with @librechat/data-schemas logger in countTokens.js and fork.js

* chore: Update logs volume path in docker-compose.yml to correct directory

* chore: import order of isEnabled in static.js
2025-10-01 23:30:47 -04:00

37 lines
1.6 KiB
JavaScript

const { Tiktoken } = require('tiktoken/lite');
const { logger } = require('@librechat/data-schemas');
const p50k_base = require('tiktoken/encoders/p50k_base.json');
const cl100k_base = require('tiktoken/encoders/cl100k_base.json');
/**
* Counts the number of tokens in a given text using a specified encoding model.
*
* This function utilizes the 'Tiktoken' library to encode text based on the selected model.
* It supports two models, 'text-davinci-003' and 'gpt-3.5-turbo', each with its own encoding strategy.
* For 'text-davinci-003', the 'p50k_base' encoder is used, whereas for other models, the 'cl100k_base' encoder is applied.
* In case of an error during encoding, the error is logged, and the function returns 0.
*
* @async
* @param {string} text - The text to be tokenized. Defaults to an empty string if not provided.
* @param {string} modelName - The name of the model used for tokenizing. Defaults to 'gpt-3.5-turbo'.
* @returns {Promise<number>} The number of tokens in the provided text. Returns 0 if an error occurs.
* @throws Logs the error to a logger and rethrows if any error occurs during tokenization.
*/
const countTokens = async (text = '', modelName = 'gpt-3.5-turbo') => {
let encoder = null;
try {
const model = modelName.includes('text-davinci-003') ? p50k_base : cl100k_base;
encoder = new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str);
const tokens = encoder.encode(text);
encoder.free();
return tokens.length;
} catch (e) {
logger.error('[countTokens]', e);
if (encoder) {
encoder.free();
}
return 0;
}
};
module.exports = countTokens;