mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-21 02:40:14 +01:00
🔄 refactor: Consolidate Tokenizer; Fix Jest Open Handles (#5175)
* refactor: consolidate tokenizer to singleton * fix: remove legacy tokenizer code, add Tokenizer singleton tests * ci: fix jest open handles
This commit is contained in:
parent
bf0a84e45a
commit
c26b54c74d
11 changed files with 202 additions and 221 deletions
|
|
@ -6,7 +6,6 @@ const { ChatGoogleVertexAI } = require('@langchain/google-vertexai');
|
|||
const { ChatGoogleGenerativeAI } = require('@langchain/google-genai');
|
||||
const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai');
|
||||
const { AIMessage, HumanMessage, SystemMessage } = require('@langchain/core/messages');
|
||||
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
||||
const {
|
||||
validateVisionModel,
|
||||
getResponseSender,
|
||||
|
|
@ -17,6 +16,7 @@ const {
|
|||
AuthKeys,
|
||||
} = require('librechat-data-provider');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
|
|
@ -31,7 +31,6 @@ const BaseClient = require('./BaseClient');
|
|||
const loc = process.env.GOOGLE_LOC || 'us-central1';
|
||||
const publisher = 'google';
|
||||
const endpointPrefix = `${loc}-aiplatform.googleapis.com`;
|
||||
const tokenizersCache = {};
|
||||
|
||||
const settings = endpointSettings[EModelEndpoint.google];
|
||||
const EXCLUDED_GENAI_MODELS = /gemini-(?:1\.0|1-0|pro)/;
|
||||
|
|
@ -177,25 +176,15 @@ class GoogleClient extends BaseClient {
|
|||
// without tripping the stop sequences, so I'm using "||>" instead.
|
||||
this.startToken = '||>';
|
||||
this.endToken = '';
|
||||
this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
|
||||
} else if (isTextModel) {
|
||||
this.startToken = '||>';
|
||||
this.endToken = '';
|
||||
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true, {
|
||||
'<|im_start|>': 100264,
|
||||
'<|im_end|>': 100265,
|
||||
});
|
||||
} else {
|
||||
// Previously I was trying to use "<|endoftext|>" but there seems to be some bug with OpenAI's token counting
|
||||
// system that causes only the first "<|endoftext|>" to be counted as 1 token, and the rest are not treated
|
||||
// as a single token. So we're using this instead.
|
||||
this.startToken = '||>';
|
||||
this.endToken = '';
|
||||
try {
|
||||
this.gptEncoder = this.constructor.getTokenizer(this.modelOptions.model, true);
|
||||
} catch {
|
||||
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true);
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.modelOptions.stop) {
|
||||
|
|
@ -926,23 +915,18 @@ class GoogleClient extends BaseClient {
|
|||
];
|
||||
}
|
||||
|
||||
/* TO-DO: Handle tokens with Google tokenization NOTE: these are required */
|
||||
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
|
||||
if (tokenizersCache[encoding]) {
|
||||
return tokenizersCache[encoding];
|
||||
}
|
||||
let tokenizer;
|
||||
if (isModelName) {
|
||||
tokenizer = encodingForModel(encoding, extendSpecialTokens);
|
||||
} else {
|
||||
tokenizer = getEncoding(encoding, extendSpecialTokens);
|
||||
}
|
||||
tokenizersCache[encoding] = tokenizer;
|
||||
return tokenizer;
|
||||
getEncoding() {
|
||||
return 'cl100k_base';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
|
||||
* @param {string} text - The text to get the token count for.
|
||||
* @returns {number} The token count of the given text.
|
||||
*/
|
||||
getTokenCount(text) {
|
||||
return this.gptEncoder.encode(text, 'all').length;
|
||||
const encoding = this.getEncoding();
|
||||
return Tokenizer.getTokenCount(text, encoding);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue