LibreChat/api/app/clients/OpenAIClient.js
Danny Avila 4ca43fb53d
refactor: Encrypt & Expire User Provided Keys, feat: Rate Limiting (#874)
* docs: make_your_own.md formatting fix for mkdocs

* feat: add express-mongo-sanitize
feat: add login/registration rate limiting

* chore: remove unnecessary console log

* wip: remove token handling from localStorage to encrypted DB solution

* refactor: minor change to UserService

* fix mongo query and add keys route to server

* fix backend controllers and simplify schema/crud

* refactor: rename token to key to separate from access/refresh tokens, setTokenDialog -> setKeyDialog

* refactor(schemas): TEndpointOption token -> key

* refactor(api): use new encrypted key retrieval system

* fix(SetKeyDialog): fix key prop error

* fix(abortMiddleware): pass random UUID if messageId is not generated yet for proper error display on frontend

* fix(getUserKey): wrong prop passed in arg, adds error handling

* fix: prevent message without conversationId from saving to DB, prevents branching on the frontend to a new top-level branch

* refactor: change wording of multiple display messages

* refactor(checkExpiry -> checkUserKeyExpiry): move to UserService file

* fix: type imports from common

* refactor(SubmitButton): convert to TS

* refactor(key.ts): change localStorage map key name

* refactor: add new custom tailwind classes to better match openAI colors

* chore: remove unnecessary warning and catch ScreenShot error

* refactor: move userKey frontend logic to hooks and remove use of localStorage and instead query the DB

* refactor: invalidate correct query key, memoize userKey hook, conditionally render SetKeyDialog to avoid unnecessary calls, refactor SubmitButton props and useEffect for showing 'provide key first'

* fix(SetKeyDialog): use enum-like object for expiry values
feat(Dropdown): add optionsClassName to dynamically change dropdown options container classes

* fix: handle edge case where user had provided a key but the server changes to env variable for keys

* refactor(OpenAI/titleConvo): move titling to client to retain authorized credentials in message lifecycle for titling

* fix(azure): handle user_provided keys correctly for azure

* feat: send user Id to OpenAI to differentiate users in completion requests

* refactor(OpenAI/titleConvo): adding tokens helps minimize LLM from using the language in title response

* feat: add delete endpoint for keys

* chore: remove throttling of title

* feat: add 'Data controls' to Settings, add 'Revoke' keys feature in Key Dialog and Data controls

* refactor: reorganize PluginsClient files in langchain format

* feat: use langchain for titling convos

* chore: cleanup titling convo, with fallback to original method, escape braces, use only snippet for language detection

* refactor: move helper functions to appropriate langchain folders for reusability

* fix: userProvidesKey handling for gptPlugins

* fix: frontend handling of plugins key

* chore: cleanup logging and ts-ignore SSE

* fix: forwardRef misuse in DangerButton

* fix(GoogleConfig/FileUpload): localize errors and simplify validation with zod

* fix: cleanup google logging and fix user provided key handling

* chore: remove titling from google

* chore: removing logging from browser endpoint

* wip: fix menu flicker

* feat: useLocalStorage hook

* feat: add Tooltip for UI

* refactor(EndpointMenu): utilize Tooltip and useLocalStorage, remove old 'New Chat' slide-over

* fix(e2e): use testId for endpoint menu trigger

* chore: final touches to EndpointMenu before future refactor to declutter component

* refactor(localization): change select endpoint to open menu and add translations

* chore: add final prop to error message response

* ci: minor edits to facilitate testing

* ci: new e2e test which tests for new key setting/revoking features
2023-09-06 10:46:27 -04:00

440 lines
13 KiB
JavaScript

const BaseClient = require('./BaseClient');
const ChatGPTClient = require('./ChatGPTClient');
const {
encoding_for_model: encodingForModel,
get_encoding: getEncoding,
} = require('@dqbd/tiktoken');
const { maxTokensMap, genAzureChatCompletion } = require('../../utils');
const { runTitleChain } = require('./chains');
const { createLLM } = require('./llm');
// Cache to store Tiktoken instances
const tokenizersCache = {};
// Counter for keeping track of the number of tokenizer calls
let tokenizerCallsCount = 0;
class OpenAIClient extends BaseClient {
constructor(apiKey, options = {}) {
super(apiKey, options);
this.ChatGPTClient = new ChatGPTClient();
this.buildPrompt = this.ChatGPTClient.buildPrompt.bind(this);
this.getCompletion = this.ChatGPTClient.getCompletion.bind(this);
this.sender = options.sender ?? 'ChatGPT';
this.contextStrategy = options.contextStrategy
? options.contextStrategy.toLowerCase()
: 'discard';
this.shouldRefineContext = this.contextStrategy === 'refine';
this.azure = options.azure || false;
if (this.azure) {
this.azureEndpoint = genAzureChatCompletion(this.azure);
}
this.setOptions(options);
}
setOptions(options) {
if (this.options && !this.options.replaceOptions) {
this.options.modelOptions = {
...this.options.modelOptions,
...options.modelOptions,
};
delete options.modelOptions;
this.options = {
...this.options,
...options,
};
} else {
this.options = options;
}
if (this.options.openaiApiKey) {
this.apiKey = this.options.openaiApiKey;
}
const modelOptions = this.options.modelOptions || {};
if (!this.modelOptions) {
this.modelOptions = {
...modelOptions,
model: modelOptions.model || 'gpt-3.5-turbo',
temperature:
typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature,
top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p,
presence_penalty:
typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty,
stop: modelOptions.stop,
};
}
this.isChatCompletion =
this.options.reverseProxyUrl ||
this.options.localAI ||
this.modelOptions.model.startsWith('gpt-');
this.isChatGptModel = this.isChatCompletion;
if (this.modelOptions.model === 'text-davinci-003') {
this.isChatCompletion = false;
this.isChatGptModel = false;
}
const { isChatGptModel } = this;
this.isUnofficialChatGptModel =
this.modelOptions.model.startsWith('text-chat') ||
this.modelOptions.model.startsWith('text-davinci-002-render');
this.maxContextTokens = maxTokensMap[this.modelOptions.model] ?? 4095; // 1 less than maximum
this.maxResponseTokens = this.modelOptions.max_tokens || 1024;
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
throw new Error(
`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${
this.maxPromptTokens + this.maxResponseTokens
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`,
);
}
this.userLabel = this.options.userLabel || 'User';
this.chatGptLabel = this.options.chatGptLabel || 'Assistant';
this.setupTokens();
if (!this.modelOptions.stop) {
const stopTokens = [this.startToken];
if (this.endToken && this.endToken !== this.startToken) {
stopTokens.push(this.endToken);
}
stopTokens.push(`\n${this.userLabel}:`);
stopTokens.push('<|diff_marker|>');
this.modelOptions.stop = stopTokens;
}
if (this.options.reverseProxyUrl) {
this.completionsUrl = this.options.reverseProxyUrl;
this.langchainProxy = this.options.reverseProxyUrl.match(/.*v1/)[0];
} else if (isChatGptModel) {
this.completionsUrl = 'https://api.openai.com/v1/chat/completions';
} else {
this.completionsUrl = 'https://api.openai.com/v1/completions';
}
if (this.azureEndpoint) {
this.completionsUrl = this.azureEndpoint;
}
if (this.azureEndpoint && this.options.debug) {
console.debug('Using Azure endpoint');
}
return this;
}
setupTokens() {
if (this.isChatCompletion) {
this.startToken = '||>';
this.endToken = '';
} else if (this.isUnofficialChatGptModel) {
this.startToken = '<|im_start|>';
this.endToken = '<|im_end|>';
} else {
this.startToken = '||>';
this.endToken = '';
}
}
// Selects an appropriate tokenizer based on the current configuration of the client instance.
// It takes into account factors such as whether it's a chat completion, an unofficial chat GPT model, etc.
selectTokenizer() {
let tokenizer;
this.encoding = 'text-davinci-003';
if (this.isChatCompletion) {
this.encoding = 'cl100k_base';
tokenizer = this.constructor.getTokenizer(this.encoding);
} else if (this.isUnofficialChatGptModel) {
const extendSpecialTokens = {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
};
tokenizer = this.constructor.getTokenizer(this.encoding, true, extendSpecialTokens);
} else {
try {
this.encoding = this.modelOptions.model;
tokenizer = this.constructor.getTokenizer(this.modelOptions.model, true);
} catch {
tokenizer = this.constructor.getTokenizer(this.encoding, true);
}
}
return tokenizer;
}
// Retrieves a tokenizer either from the cache or creates a new one if one doesn't exist in the cache.
// If a tokenizer is being created, it's also added to the cache.
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
let tokenizer;
if (tokenizersCache[encoding]) {
tokenizer = tokenizersCache[encoding];
} else {
if (isModelName) {
tokenizer = encodingForModel(encoding, extendSpecialTokens);
} else {
tokenizer = getEncoding(encoding, extendSpecialTokens);
}
tokenizersCache[encoding] = tokenizer;
}
return tokenizer;
}
// Frees all encoders in the cache and resets the count.
static freeAndResetAllEncoders() {
try {
Object.keys(tokenizersCache).forEach((key) => {
if (tokenizersCache[key]) {
tokenizersCache[key].free();
delete tokenizersCache[key];
}
});
// Reset count
tokenizerCallsCount = 1;
} catch (error) {
console.log('Free and reset encoders error');
console.error(error);
}
}
// Checks if the cache of tokenizers has reached a certain size. If it has, it frees and resets all tokenizers.
resetTokenizersIfNecessary() {
if (tokenizerCallsCount >= 25) {
if (this.options.debug) {
console.debug('freeAndResetAllEncoders: reached 25 encodings, resetting...');
}
this.constructor.freeAndResetAllEncoders();
}
tokenizerCallsCount++;
}
// Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
getTokenCount(text) {
this.resetTokenizersIfNecessary();
try {
const tokenizer = this.selectTokenizer();
return tokenizer.encode(text, 'all').length;
} catch (error) {
this.constructor.freeAndResetAllEncoders();
const tokenizer = this.selectTokenizer();
return tokenizer.encode(text, 'all').length;
}
}
getSaveOptions() {
return {
chatGptLabel: this.options.chatGptLabel,
promptPrefix: this.options.promptPrefix,
...this.modelOptions,
};
}
getBuildMessagesOptions(opts) {
return {
isChatCompletion: this.isChatCompletion,
promptPrefix: opts.promptPrefix,
abortController: opts.abortController,
};
}
async buildMessages(
messages,
parentMessageId,
{ isChatCompletion = false, promptPrefix = null },
) {
if (!isChatCompletion) {
return await this.buildPrompt(messages, parentMessageId, {
isChatGptModel: isChatCompletion,
promptPrefix,
});
}
let payload;
let instructions;
let tokenCountMap;
let promptTokens;
let orderedMessages = this.constructor.getMessagesForConversation(messages, parentMessageId);
promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim();
if (promptPrefix) {
promptPrefix = `Instructions:\n${promptPrefix}`;
instructions = {
role: 'system',
name: 'instructions',
content: promptPrefix,
};
if (this.contextStrategy) {
instructions.tokenCount = this.getTokenCountForMessage(instructions);
}
}
const formattedMessages = orderedMessages.map((message) => {
let { role: _role, sender, text } = message;
const role = _role ?? sender;
const content = text ?? '';
const formattedMessage = {
role: role?.toLowerCase() === 'user' ? 'user' : 'assistant',
content,
};
if (this.options?.name && formattedMessage.role === 'user') {
formattedMessage.name = this.options.name;
}
if (this.contextStrategy) {
formattedMessage.tokenCount =
message.tokenCount ?? this.getTokenCountForMessage(formattedMessage);
}
return formattedMessage;
});
// TODO: need to handle interleaving instructions better
if (this.contextStrategy) {
({ payload, tokenCountMap, promptTokens, messages } = await this.handleContextStrategy({
instructions,
orderedMessages,
formattedMessages,
}));
}
const result = {
prompt: payload,
promptTokens,
messages,
};
if (tokenCountMap) {
tokenCountMap.instructions = instructions?.tokenCount;
result.tokenCountMap = tokenCountMap;
}
return result;
}
async sendCompletion(payload, opts = {}) {
let reply = '';
let result = null;
let streamResult = null;
this.modelOptions.user = this.user;
if (typeof opts.onProgress === 'function') {
await this.getCompletion(
payload,
(progressMessage) => {
if (progressMessage === '[DONE]') {
return;
}
if (progressMessage.choices) {
streamResult = progressMessage;
}
const token = this.isChatCompletion
? progressMessage.choices?.[0]?.delta?.content
: progressMessage.choices?.[0]?.text;
// first event's delta content is always undefined
if (!token) {
return;
}
if (this.options.debug) {
// console.debug(token);
}
if (token === this.endToken) {
return;
}
opts.onProgress(token);
reply += token;
},
opts.abortController || new AbortController(),
);
} else {
result = await this.getCompletion(
payload,
null,
opts.abortController || new AbortController(),
);
if (this.options.debug) {
console.debug(JSON.stringify(result));
}
if (this.isChatCompletion) {
reply = result.choices[0].message.content;
} else {
reply = result.choices[0].text.replace(this.endToken, '');
}
}
if (streamResult && typeof opts.addMetadata === 'function') {
const { finish_reason } = streamResult.choices[0];
opts.addMetadata({ finish_reason });
}
return reply.trim();
}
getTokenCountForResponse(response) {
return this.getTokenCountForMessage({
role: 'assistant',
content: response.text,
});
}
async titleConvo({ text, responseText = '' }) {
let title = 'New Chat';
const convo = `||>User:
"${text}"
||>Response:
"${JSON.stringify(responseText)}"`;
const modelOptions = {
model: 'gpt-3.5-turbo-0613',
temperature: 0.2,
presence_penalty: 0,
frequency_penalty: 0,
max_tokens: 16,
};
const configOptions = {};
if (this.langchainProxy) {
configOptions.basePath = this.langchainProxy;
}
try {
const llm = createLLM({
modelOptions,
configOptions,
openAIApiKey: this.apiKey,
azure: this.azure,
});
title = await runTitleChain({ llm, text, convo });
} catch (e) {
console.error(e.message);
console.log('There was an issue generating title with LangChain, trying the old method...');
modelOptions.model = 'gpt-3.5-turbo';
const instructionsPayload = [
{
role: 'system',
content: `Detect user language and write in the same language an extremely concise title for this conversation, which you must accurately detect.
Write in the detected language. Title in 5 Words or Less. No Punctuation or Quotation. Do not mention the language. All first letters of every word should be capitalized and write the title in User Language only.
${convo}
||>Title:`,
},
];
try {
title = (await this.sendPayload(instructionsPayload, { modelOptions })).replaceAll('"', '');
} catch (e) {
console.error(e);
console.log('There was another issue generating the title, see error above.');
}
}
console.log('CONVERSATION TITLE', title);
return title;
}
}
module.exports = OpenAIClient;