mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-11 20:14:24 +01:00
🔄 refactor: Consolidate Tokenizer; Fix Jest Open Handles (#5175)
* refactor: consolidate tokenizer to singleton * fix: remove legacy tokenizer code, add Tokenizer singleton tests * ci: fix jest open handles
This commit is contained in:
parent
bf0a84e45a
commit
c26b54c74d
11 changed files with 202 additions and 221 deletions
|
|
@ -1,6 +1,5 @@
|
|||
const Anthropic = require('@anthropic-ai/sdk');
|
||||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
||||
const {
|
||||
Constants,
|
||||
EModelEndpoint,
|
||||
|
|
@ -19,6 +18,7 @@ const {
|
|||
} = require('./prompts');
|
||||
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
|
||||
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const BaseClient = require('./BaseClient');
|
||||
const { logger } = require('~/config');
|
||||
|
|
@ -26,8 +26,6 @@ const { logger } = require('~/config');
|
|||
const HUMAN_PROMPT = '\n\nHuman:';
|
||||
const AI_PROMPT = '\n\nAssistant:';
|
||||
|
||||
const tokenizersCache = {};
|
||||
|
||||
/** Helper function to introduce a delay before retrying */
|
||||
function delayBeforeRetry(attempts, baseDelay = 1000) {
|
||||
return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts));
|
||||
|
|
@ -149,7 +147,6 @@ class AnthropicClient extends BaseClient {
|
|||
|
||||
this.startToken = '||>';
|
||||
this.endToken = '';
|
||||
this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
|
||||
|
||||
return this;
|
||||
}
|
||||
|
|
@ -849,22 +846,18 @@ class AnthropicClient extends BaseClient {
|
|||
logger.debug('AnthropicClient doesn\'t use getBuildMessagesOptions');
|
||||
}
|
||||
|
||||
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
|
||||
if (tokenizersCache[encoding]) {
|
||||
return tokenizersCache[encoding];
|
||||
}
|
||||
let tokenizer;
|
||||
if (isModelName) {
|
||||
tokenizer = encodingForModel(encoding, extendSpecialTokens);
|
||||
} else {
|
||||
tokenizer = getEncoding(encoding, extendSpecialTokens);
|
||||
}
|
||||
tokenizersCache[encoding] = tokenizer;
|
||||
return tokenizer;
|
||||
getEncoding() {
|
||||
return 'cl100k_base';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
|
||||
* @param {string} text - The text to get the token count for.
|
||||
* @returns {number} The token count of the given text.
|
||||
*/
|
||||
getTokenCount(text) {
|
||||
return this.gptEncoder.encode(text, 'all').length;
|
||||
const encoding = this.getEncoding();
|
||||
return Tokenizer.getTokenCount(text, encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ const { ChatGoogleVertexAI } = require('@langchain/google-vertexai');
|
|||
const { ChatGoogleGenerativeAI } = require('@langchain/google-genai');
|
||||
const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai');
|
||||
const { AIMessage, HumanMessage, SystemMessage } = require('@langchain/core/messages');
|
||||
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
||||
const {
|
||||
validateVisionModel,
|
||||
getResponseSender,
|
||||
|
|
@ -17,6 +16,7 @@ const {
|
|||
AuthKeys,
|
||||
} = require('librechat-data-provider');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
|
|
@ -31,7 +31,6 @@ const BaseClient = require('./BaseClient');
|
|||
const loc = process.env.GOOGLE_LOC || 'us-central1';
|
||||
const publisher = 'google';
|
||||
const endpointPrefix = `${loc}-aiplatform.googleapis.com`;
|
||||
const tokenizersCache = {};
|
||||
|
||||
const settings = endpointSettings[EModelEndpoint.google];
|
||||
const EXCLUDED_GENAI_MODELS = /gemini-(?:1\.0|1-0|pro)/;
|
||||
|
|
@ -177,25 +176,15 @@ class GoogleClient extends BaseClient {
|
|||
// without tripping the stop sequences, so I'm using "||>" instead.
|
||||
this.startToken = '||>';
|
||||
this.endToken = '';
|
||||
this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
|
||||
} else if (isTextModel) {
|
||||
this.startToken = '||>';
|
||||
this.endToken = '';
|
||||
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true, {
|
||||
'<|im_start|>': 100264,
|
||||
'<|im_end|>': 100265,
|
||||
});
|
||||
} else {
|
||||
// Previously I was trying to use "<|endoftext|>" but there seems to be some bug with OpenAI's token counting
|
||||
// system that causes only the first "<|endoftext|>" to be counted as 1 token, and the rest are not treated
|
||||
// as a single token. So we're using this instead.
|
||||
this.startToken = '||>';
|
||||
this.endToken = '';
|
||||
try {
|
||||
this.gptEncoder = this.constructor.getTokenizer(this.modelOptions.model, true);
|
||||
} catch {
|
||||
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true);
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.modelOptions.stop) {
|
||||
|
|
@ -926,23 +915,18 @@ class GoogleClient extends BaseClient {
|
|||
];
|
||||
}
|
||||
|
||||
/* TO-DO: Handle tokens with Google tokenization NOTE: these are required */
|
||||
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
|
||||
if (tokenizersCache[encoding]) {
|
||||
return tokenizersCache[encoding];
|
||||
}
|
||||
let tokenizer;
|
||||
if (isModelName) {
|
||||
tokenizer = encodingForModel(encoding, extendSpecialTokens);
|
||||
} else {
|
||||
tokenizer = getEncoding(encoding, extendSpecialTokens);
|
||||
}
|
||||
tokenizersCache[encoding] = tokenizer;
|
||||
return tokenizer;
|
||||
getEncoding() {
|
||||
return 'cl100k_base';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
|
||||
* @param {string} text - The text to get the token count for.
|
||||
* @returns {number} The token count of the given text.
|
||||
*/
|
||||
getTokenCount(text) {
|
||||
return this.gptEncoder.encode(text, 'all').length;
|
||||
const encoding = this.getEncoding();
|
||||
return Tokenizer.getTokenCount(text, encoding);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ const {
|
|||
validateVisionModel,
|
||||
mapModelToAzureConfig,
|
||||
} = require('librechat-data-provider');
|
||||
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
||||
const {
|
||||
extractBaseURL,
|
||||
constructAzureURL,
|
||||
|
|
@ -29,6 +28,7 @@ const {
|
|||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const { isEnabled, sleep } = require('~/server/utils');
|
||||
const { handleOpenAIErrors } = require('./tools/util');
|
||||
|
|
@ -40,11 +40,6 @@ const { tokenSplit } = require('./document');
|
|||
const BaseClient = require('./BaseClient');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
// Cache to store Tiktoken instances
|
||||
const tokenizersCache = {};
|
||||
// Counter for keeping track of the number of tokenizer calls
|
||||
let tokenizerCallsCount = 0;
|
||||
|
||||
class OpenAIClient extends BaseClient {
|
||||
constructor(apiKey, options = {}) {
|
||||
super(apiKey, options);
|
||||
|
|
@ -307,75 +302,8 @@ class OpenAIClient extends BaseClient {
|
|||
}
|
||||
}
|
||||
|
||||
// Selects an appropriate tokenizer based on the current configuration of the client instance.
|
||||
// It takes into account factors such as whether it's a chat completion, an unofficial chat GPT model, etc.
|
||||
selectTokenizer() {
|
||||
let tokenizer;
|
||||
this.encoding = 'text-davinci-003';
|
||||
if (this.isChatCompletion) {
|
||||
this.encoding = this.modelOptions.model.includes('gpt-4o') ? 'o200k_base' : 'cl100k_base';
|
||||
tokenizer = this.constructor.getTokenizer(this.encoding);
|
||||
} else if (this.isUnofficialChatGptModel) {
|
||||
const extendSpecialTokens = {
|
||||
'<|im_start|>': 100264,
|
||||
'<|im_end|>': 100265,
|
||||
};
|
||||
tokenizer = this.constructor.getTokenizer(this.encoding, true, extendSpecialTokens);
|
||||
} else {
|
||||
try {
|
||||
const { model } = this.modelOptions;
|
||||
this.encoding = model.includes('instruct') ? 'text-davinci-003' : model;
|
||||
tokenizer = this.constructor.getTokenizer(this.encoding, true);
|
||||
} catch {
|
||||
tokenizer = this.constructor.getTokenizer('text-davinci-003', true);
|
||||
}
|
||||
}
|
||||
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
// Retrieves a tokenizer either from the cache or creates a new one if one doesn't exist in the cache.
|
||||
// If a tokenizer is being created, it's also added to the cache.
|
||||
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
|
||||
let tokenizer;
|
||||
if (tokenizersCache[encoding]) {
|
||||
tokenizer = tokenizersCache[encoding];
|
||||
} else {
|
||||
if (isModelName) {
|
||||
tokenizer = encodingForModel(encoding, extendSpecialTokens);
|
||||
} else {
|
||||
tokenizer = getEncoding(encoding, extendSpecialTokens);
|
||||
}
|
||||
tokenizersCache[encoding] = tokenizer;
|
||||
}
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
// Frees all encoders in the cache and resets the count.
|
||||
static freeAndResetAllEncoders() {
|
||||
try {
|
||||
Object.keys(tokenizersCache).forEach((key) => {
|
||||
if (tokenizersCache[key]) {
|
||||
tokenizersCache[key].free();
|
||||
delete tokenizersCache[key];
|
||||
}
|
||||
});
|
||||
// Reset count
|
||||
tokenizerCallsCount = 1;
|
||||
} catch (error) {
|
||||
logger.error('[OpenAIClient] Free and reset encoders error', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if the cache of tokenizers has reached a certain size. If it has, it frees and resets all tokenizers.
|
||||
resetTokenizersIfNecessary() {
|
||||
if (tokenizerCallsCount >= 25) {
|
||||
if (this.options.debug) {
|
||||
logger.debug('[OpenAIClient] freeAndResetAllEncoders: reached 25 encodings, resetting...');
|
||||
}
|
||||
this.constructor.freeAndResetAllEncoders();
|
||||
}
|
||||
tokenizerCallsCount++;
|
||||
getEncoding() {
|
||||
return this.model?.includes('gpt-4o') ? 'o200k_base' : 'cl100k_base';
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -384,15 +312,8 @@ class OpenAIClient extends BaseClient {
|
|||
* @returns {number} The token count of the given text.
|
||||
*/
|
||||
getTokenCount(text) {
|
||||
this.resetTokenizersIfNecessary();
|
||||
try {
|
||||
const tokenizer = this.selectTokenizer();
|
||||
return tokenizer.encode(text, 'all').length;
|
||||
} catch (error) {
|
||||
this.constructor.freeAndResetAllEncoders();
|
||||
const tokenizer = this.selectTokenizer();
|
||||
return tokenizer.encode(text, 'all').length;
|
||||
}
|
||||
const encoding = this.getEncoding();
|
||||
return Tokenizer.getTokenCount(text, encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
jest.mock('~/cache/getLogStores');
|
||||
require('dotenv').config();
|
||||
const OpenAI = require('openai');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
|
||||
const { genAzureChatCompletion } = require('~/utils/azureUtils');
|
||||
const OpenAIClient = require('../OpenAIClient');
|
||||
|
|
@ -134,7 +136,13 @@ OpenAI.mockImplementation(() => ({
|
|||
}));
|
||||
|
||||
describe('OpenAIClient', () => {
|
||||
let client, client2;
|
||||
const mockSet = jest.fn();
|
||||
const mockCache = { set: mockSet };
|
||||
|
||||
beforeEach(() => {
|
||||
getLogStores.mockReturnValue(mockCache);
|
||||
});
|
||||
let client;
|
||||
const model = 'gpt-4';
|
||||
const parentMessageId = '1';
|
||||
const messages = [
|
||||
|
|
@ -176,7 +184,6 @@ describe('OpenAIClient', () => {
|
|||
beforeEach(() => {
|
||||
const options = { ...defaultOptions };
|
||||
client = new OpenAIClient('test-api-key', options);
|
||||
client2 = new OpenAIClient('test-api-key', options);
|
||||
client.summarizeMessages = jest.fn().mockResolvedValue({
|
||||
role: 'assistant',
|
||||
content: 'Refined answer',
|
||||
|
|
@ -185,7 +192,6 @@ describe('OpenAIClient', () => {
|
|||
client.buildPrompt = jest
|
||||
.fn()
|
||||
.mockResolvedValue({ prompt: messages.map((m) => m.text).join('\n') });
|
||||
client.constructor.freeAndResetAllEncoders();
|
||||
client.getMessages = jest.fn().mockResolvedValue([]);
|
||||
});
|
||||
|
||||
|
|
@ -335,77 +341,11 @@ describe('OpenAIClient', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('selectTokenizer', () => {
|
||||
it('should get the correct tokenizer based on the instance state', () => {
|
||||
const tokenizer = client.selectTokenizer();
|
||||
expect(tokenizer).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('freeAllTokenizers', () => {
|
||||
it('should free all tokenizers', () => {
|
||||
// Create a tokenizer
|
||||
const tokenizer = client.selectTokenizer();
|
||||
|
||||
// Mock 'free' method on the tokenizer
|
||||
tokenizer.free = jest.fn();
|
||||
|
||||
client.constructor.freeAndResetAllEncoders();
|
||||
|
||||
// Check if 'free' method has been called on the tokenizer
|
||||
expect(tokenizer.free).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getTokenCount', () => {
|
||||
it('should return the correct token count', () => {
|
||||
const count = client.getTokenCount('Hello, world!');
|
||||
expect(count).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should reset the encoder and count when count reaches 25', () => {
|
||||
const freeAndResetEncoderSpy = jest.spyOn(client.constructor, 'freeAndResetAllEncoders');
|
||||
|
||||
// Call getTokenCount 25 times
|
||||
for (let i = 0; i < 25; i++) {
|
||||
client.getTokenCount('test text');
|
||||
}
|
||||
|
||||
expect(freeAndResetEncoderSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should not reset the encoder and count when count is less than 25', () => {
|
||||
const freeAndResetEncoderSpy = jest.spyOn(client.constructor, 'freeAndResetAllEncoders');
|
||||
freeAndResetEncoderSpy.mockClear();
|
||||
|
||||
// Call getTokenCount 24 times
|
||||
for (let i = 0; i < 24; i++) {
|
||||
client.getTokenCount('test text');
|
||||
}
|
||||
|
||||
expect(freeAndResetEncoderSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle errors and reset the encoder', () => {
|
||||
const freeAndResetEncoderSpy = jest.spyOn(client.constructor, 'freeAndResetAllEncoders');
|
||||
|
||||
// Mock encode function to throw an error
|
||||
client.selectTokenizer().encode = jest.fn().mockImplementation(() => {
|
||||
throw new Error('Test error');
|
||||
});
|
||||
|
||||
client.getTokenCount('test text');
|
||||
|
||||
expect(freeAndResetEncoderSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should not throw null pointer error when freeing the same encoder twice', () => {
|
||||
client.constructor.freeAndResetAllEncoders();
|
||||
client2.constructor.freeAndResetAllEncoders();
|
||||
|
||||
const count = client2.getTokenCount('test text');
|
||||
expect(count).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getSaveOptions', () => {
|
||||
|
|
@ -548,7 +488,6 @@ describe('OpenAIClient', () => {
|
|||
testCases.forEach((testCase) => {
|
||||
it(`should return ${testCase.expected} tokens for model ${testCase.model}`, () => {
|
||||
client.modelOptions.model = testCase.model;
|
||||
client.selectTokenizer();
|
||||
// 3 tokens for assistant label
|
||||
let totalTokens = 3;
|
||||
for (let message of example_messages) {
|
||||
|
|
@ -582,7 +521,6 @@ describe('OpenAIClient', () => {
|
|||
|
||||
it(`should return ${expectedTokens} tokens for model ${visionModel} (Vision Request)`, () => {
|
||||
client.modelOptions.model = visionModel;
|
||||
client.selectTokenizer();
|
||||
// 3 tokens for assistant label
|
||||
let totalTokens = 3;
|
||||
for (let message of vision_request) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue