mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00

* chore: rename dir from `assistant` to plural * feat: `assistants` field for azure config, spread options in AppService * refactor: rename constructAzureURL param for azure as `azureOptions` * chore: bump openai and bun * chore(loadDefaultModels): change naming of assistant -> assistants * feat: load azure settings with currect baseURL for assistants' initializeClient * refactor: add `assistants` flags to groups and model configs, add mapGroupToAzureConfig * feat(loadConfigEndpoints): initialize assistants endpoint if azure flag `assistants` is enabled * feat(AppService): determine assistant models on startup, throw Error if none * refactor(useDeleteAssistantMutation): send model along with assistant id for delete mutations * feat: support listing and deleting assistants with azure * feat: add model query to assistant avatar upload * feat: add azure support for retrieveRun method * refactor: update OpenAIClient initialization * chore: update README * fix(ci): tests passing * refactor(uploadOpenAIFile): improve logging and use more efficient REST API method * refactor(useFileHandling): add model to metadata to target Azure region compatible with current model * chore(files): add azure naming pattern for valid file id recognition * fix(assistants): initialize openai with first available assistant model if none provided * refactor(uploadOpenAIFile): add content type for azure, initialize formdata before azure options * refactor(sleep): move sleep function out of Runs and into `~/server/utils` * fix(azureOpenAI/assistants): make sure to only overwrite models with assistant models if `assistants` flag is enabled * refactor(uploadOpenAIFile): revert to old method * chore(uploadOpenAIFile): use enum for file purpose * docs: azureOpenAI update guide with more info, examples * feat: enable/disable assistant capabilities and specify retrieval models * refactor: optional chain conditional statement in loadConfigModels.js * docs: add assistants examples * chore: update librechat.example.yaml * docs(azure): update note of file upload behavior in Azure OpenAI Assistants * chore: update docs and add descriptive message about assistant errors * fix: prevent message submission with invalid assistant or if files loading * style: update Landing icon & text when assistant is not selected * chore: bump librechat-data-provider to 0.4.8 * fix(assistants/azure): assign req.body.model for proper azure init to abort runs
715 lines
24 KiB
JavaScript
715 lines
24 KiB
JavaScript
const Keyv = require('keyv');
|
|
const crypto = require('crypto');
|
|
const {
|
|
EModelEndpoint,
|
|
resolveHeaders,
|
|
mapModelToAzureConfig,
|
|
} = require('librechat-data-provider');
|
|
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
|
|
const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
|
|
const { Agent, ProxyAgent } = require('undici');
|
|
const BaseClient = require('./BaseClient');
|
|
const { logger } = require('~/config');
|
|
const { extractBaseURL, constructAzureURL, genAzureChatCompletion } = require('~/utils');
|
|
|
|
const CHATGPT_MODEL = 'gpt-3.5-turbo';
|
|
const tokenizersCache = {};
|
|
|
|
class ChatGPTClient extends BaseClient {
|
|
constructor(apiKey, options = {}, cacheOptions = {}) {
|
|
super(apiKey, options, cacheOptions);
|
|
|
|
cacheOptions.namespace = cacheOptions.namespace || 'chatgpt';
|
|
this.conversationsCache = new Keyv(cacheOptions);
|
|
this.setOptions(options);
|
|
}
|
|
|
|
setOptions(options) {
|
|
if (this.options && !this.options.replaceOptions) {
|
|
// nested options aren't spread properly, so we need to do this manually
|
|
this.options.modelOptions = {
|
|
...this.options.modelOptions,
|
|
...options.modelOptions,
|
|
};
|
|
delete options.modelOptions;
|
|
// now we can merge options
|
|
this.options = {
|
|
...this.options,
|
|
...options,
|
|
};
|
|
} else {
|
|
this.options = options;
|
|
}
|
|
|
|
if (this.options.openaiApiKey) {
|
|
this.apiKey = this.options.openaiApiKey;
|
|
}
|
|
|
|
const modelOptions = this.options.modelOptions || {};
|
|
this.modelOptions = {
|
|
...modelOptions,
|
|
// set some good defaults (check for undefined in some cases because they may be 0)
|
|
model: modelOptions.model || CHATGPT_MODEL,
|
|
temperature: typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature,
|
|
top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p,
|
|
presence_penalty:
|
|
typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty,
|
|
stop: modelOptions.stop,
|
|
};
|
|
|
|
this.isChatGptModel = this.modelOptions.model.includes('gpt-');
|
|
const { isChatGptModel } = this;
|
|
this.isUnofficialChatGptModel =
|
|
this.modelOptions.model.startsWith('text-chat') ||
|
|
this.modelOptions.model.startsWith('text-davinci-002-render');
|
|
const { isUnofficialChatGptModel } = this;
|
|
|
|
// Davinci models have a max context length of 4097 tokens.
|
|
this.maxContextTokens = this.options.maxContextTokens || (isChatGptModel ? 4095 : 4097);
|
|
// I decided to reserve 1024 tokens for the response.
|
|
// The max prompt tokens is determined by the max context tokens minus the max response tokens.
|
|
// Earlier messages will be dropped until the prompt is within the limit.
|
|
this.maxResponseTokens = this.modelOptions.max_tokens || 1024;
|
|
this.maxPromptTokens =
|
|
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
|
|
|
|
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
|
|
throw new Error(
|
|
`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${
|
|
this.maxPromptTokens + this.maxResponseTokens
|
|
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`,
|
|
);
|
|
}
|
|
|
|
this.userLabel = this.options.userLabel || 'User';
|
|
this.chatGptLabel = this.options.chatGptLabel || 'ChatGPT';
|
|
|
|
if (isChatGptModel) {
|
|
// Use these faux tokens to help the AI understand the context since we are building the chat log ourselves.
|
|
// Trying to use "<|im_start|>" causes the AI to still generate "<" or "<|" at the end sometimes for some reason,
|
|
// without tripping the stop sequences, so I'm using "||>" instead.
|
|
this.startToken = '||>';
|
|
this.endToken = '';
|
|
this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
|
|
} else if (isUnofficialChatGptModel) {
|
|
this.startToken = '<|im_start|>';
|
|
this.endToken = '<|im_end|>';
|
|
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true, {
|
|
'<|im_start|>': 100264,
|
|
'<|im_end|>': 100265,
|
|
});
|
|
} else {
|
|
// Previously I was trying to use "<|endoftext|>" but there seems to be some bug with OpenAI's token counting
|
|
// system that causes only the first "<|endoftext|>" to be counted as 1 token, and the rest are not treated
|
|
// as a single token. So we're using this instead.
|
|
this.startToken = '||>';
|
|
this.endToken = '';
|
|
try {
|
|
this.gptEncoder = this.constructor.getTokenizer(this.modelOptions.model, true);
|
|
} catch {
|
|
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true);
|
|
}
|
|
}
|
|
|
|
if (!this.modelOptions.stop) {
|
|
const stopTokens = [this.startToken];
|
|
if (this.endToken && this.endToken !== this.startToken) {
|
|
stopTokens.push(this.endToken);
|
|
}
|
|
stopTokens.push(`\n${this.userLabel}:`);
|
|
stopTokens.push('<|diff_marker|>');
|
|
// I chose not to do one for `chatGptLabel` because I've never seen it happen
|
|
this.modelOptions.stop = stopTokens;
|
|
}
|
|
|
|
if (this.options.reverseProxyUrl) {
|
|
this.completionsUrl = this.options.reverseProxyUrl;
|
|
} else if (isChatGptModel) {
|
|
this.completionsUrl = 'https://api.openai.com/v1/chat/completions';
|
|
} else {
|
|
this.completionsUrl = 'https://api.openai.com/v1/completions';
|
|
}
|
|
|
|
return this;
|
|
}
|
|
|
|
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
|
|
if (tokenizersCache[encoding]) {
|
|
return tokenizersCache[encoding];
|
|
}
|
|
let tokenizer;
|
|
if (isModelName) {
|
|
tokenizer = encodingForModel(encoding, extendSpecialTokens);
|
|
} else {
|
|
tokenizer = getEncoding(encoding, extendSpecialTokens);
|
|
}
|
|
tokenizersCache[encoding] = tokenizer;
|
|
return tokenizer;
|
|
}
|
|
|
|
async getCompletion(input, onProgress, abortController = null) {
|
|
if (!abortController) {
|
|
abortController = new AbortController();
|
|
}
|
|
|
|
let modelOptions = { ...this.modelOptions };
|
|
if (typeof onProgress === 'function') {
|
|
modelOptions.stream = true;
|
|
}
|
|
if (this.isChatGptModel) {
|
|
modelOptions.messages = input;
|
|
} else {
|
|
modelOptions.prompt = input;
|
|
}
|
|
|
|
if (this.useOpenRouter && modelOptions.prompt) {
|
|
delete modelOptions.stop;
|
|
}
|
|
|
|
const { debug } = this.options;
|
|
let baseURL = this.completionsUrl;
|
|
if (debug) {
|
|
console.debug();
|
|
console.debug(baseURL);
|
|
console.debug(modelOptions);
|
|
console.debug();
|
|
}
|
|
|
|
const opts = {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
dispatcher: new Agent({
|
|
bodyTimeout: 0,
|
|
headersTimeout: 0,
|
|
}),
|
|
};
|
|
|
|
if (this.isVisionModel) {
|
|
modelOptions.max_tokens = 4000;
|
|
}
|
|
|
|
/** @type {TAzureConfig | undefined} */
|
|
const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];
|
|
|
|
const isAzure = this.azure || this.options.azure;
|
|
if (
|
|
(isAzure && this.isVisionModel && azureConfig) ||
|
|
(azureConfig && this.isVisionModel && this.options.endpoint === EModelEndpoint.azureOpenAI)
|
|
) {
|
|
const { modelGroupMap, groupMap } = azureConfig;
|
|
const {
|
|
azureOptions,
|
|
baseURL,
|
|
headers = {},
|
|
serverless,
|
|
} = mapModelToAzureConfig({
|
|
modelName: modelOptions.model,
|
|
modelGroupMap,
|
|
groupMap,
|
|
});
|
|
opts.headers = resolveHeaders(headers);
|
|
this.langchainProxy = extractBaseURL(baseURL);
|
|
this.apiKey = azureOptions.azureOpenAIApiKey;
|
|
|
|
const groupName = modelGroupMap[modelOptions.model].group;
|
|
this.options.addParams = azureConfig.groupMap[groupName].addParams;
|
|
this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
|
|
// Note: `forcePrompt` not re-assigned as only chat models are vision models
|
|
|
|
this.azure = !serverless && azureOptions;
|
|
this.azureEndpoint =
|
|
!serverless && genAzureChatCompletion(this.azure, modelOptions.model, this);
|
|
}
|
|
|
|
if (this.options.headers) {
|
|
opts.headers = { ...opts.headers, ...this.options.headers };
|
|
}
|
|
|
|
if (isAzure) {
|
|
// Azure does not accept `model` in the body, so we need to remove it.
|
|
delete modelOptions.model;
|
|
|
|
baseURL = this.langchainProxy
|
|
? constructAzureURL({
|
|
baseURL: this.langchainProxy,
|
|
azureOptions: this.azure,
|
|
})
|
|
: this.azureEndpoint.split(/\/(chat|completion)/)[0];
|
|
|
|
if (this.options.forcePrompt) {
|
|
baseURL += '/completions';
|
|
} else {
|
|
baseURL += '/chat/completions';
|
|
}
|
|
|
|
opts.defaultQuery = { 'api-version': this.azure.azureOpenAIApiVersion };
|
|
opts.headers = { ...opts.headers, 'api-key': this.apiKey };
|
|
} else if (this.apiKey) {
|
|
opts.headers.Authorization = `Bearer ${this.apiKey}`;
|
|
}
|
|
|
|
if (process.env.OPENAI_ORGANIZATION) {
|
|
opts.headers['OpenAI-Organization'] = process.env.OPENAI_ORGANIZATION;
|
|
}
|
|
|
|
if (this.useOpenRouter) {
|
|
opts.headers['HTTP-Referer'] = 'https://librechat.ai';
|
|
opts.headers['X-Title'] = 'LibreChat';
|
|
}
|
|
|
|
if (this.options.proxy) {
|
|
opts.dispatcher = new ProxyAgent(this.options.proxy);
|
|
}
|
|
|
|
/* hacky fixes for Mistral AI API:
|
|
- Re-orders system message to the top of the messages payload, as not allowed anywhere else
|
|
- If there is only one message and it's a system message, change the role to user
|
|
*/
|
|
if (baseURL.includes('https://api.mistral.ai/v1') && modelOptions.messages) {
|
|
const { messages } = modelOptions;
|
|
|
|
const systemMessageIndex = messages.findIndex((msg) => msg.role === 'system');
|
|
|
|
if (systemMessageIndex > 0) {
|
|
const [systemMessage] = messages.splice(systemMessageIndex, 1);
|
|
messages.unshift(systemMessage);
|
|
}
|
|
|
|
modelOptions.messages = messages;
|
|
|
|
if (messages.length === 1 && messages[0].role === 'system') {
|
|
modelOptions.messages[0].role = 'user';
|
|
}
|
|
}
|
|
|
|
if (this.options.addParams && typeof this.options.addParams === 'object') {
|
|
modelOptions = {
|
|
...modelOptions,
|
|
...this.options.addParams,
|
|
};
|
|
logger.debug('[ChatGPTClient] chatCompletion: added params', {
|
|
addParams: this.options.addParams,
|
|
modelOptions,
|
|
});
|
|
}
|
|
|
|
if (this.options.dropParams && Array.isArray(this.options.dropParams)) {
|
|
this.options.dropParams.forEach((param) => {
|
|
delete modelOptions[param];
|
|
});
|
|
logger.debug('[ChatGPTClient] chatCompletion: dropped params', {
|
|
dropParams: this.options.dropParams,
|
|
modelOptions,
|
|
});
|
|
}
|
|
|
|
if (baseURL.includes('v1') && !baseURL.includes('/completions') && !this.isChatCompletion) {
|
|
baseURL = baseURL.split('v1')[0] + 'v1/completions';
|
|
} else if (
|
|
baseURL.includes('v1') &&
|
|
!baseURL.includes('/chat/completions') &&
|
|
this.isChatCompletion
|
|
) {
|
|
baseURL = baseURL.split('v1')[0] + 'v1/chat/completions';
|
|
}
|
|
|
|
const BASE_URL = new URL(baseURL);
|
|
if (opts.defaultQuery) {
|
|
Object.entries(opts.defaultQuery).forEach(([key, value]) => {
|
|
BASE_URL.searchParams.append(key, value);
|
|
});
|
|
delete opts.defaultQuery;
|
|
}
|
|
|
|
const completionsURL = BASE_URL.toString();
|
|
opts.body = JSON.stringify(modelOptions);
|
|
|
|
if (modelOptions.stream) {
|
|
// eslint-disable-next-line no-async-promise-executor
|
|
return new Promise(async (resolve, reject) => {
|
|
try {
|
|
let done = false;
|
|
await fetchEventSource(completionsURL, {
|
|
...opts,
|
|
signal: abortController.signal,
|
|
async onopen(response) {
|
|
if (response.status === 200) {
|
|
return;
|
|
}
|
|
if (debug) {
|
|
console.debug(response);
|
|
}
|
|
let error;
|
|
try {
|
|
const body = await response.text();
|
|
error = new Error(`Failed to send message. HTTP ${response.status} - ${body}`);
|
|
error.status = response.status;
|
|
error.json = JSON.parse(body);
|
|
} catch {
|
|
error = error || new Error(`Failed to send message. HTTP ${response.status}`);
|
|
}
|
|
throw error;
|
|
},
|
|
onclose() {
|
|
if (debug) {
|
|
console.debug('Server closed the connection unexpectedly, returning...');
|
|
}
|
|
// workaround for private API not sending [DONE] event
|
|
if (!done) {
|
|
onProgress('[DONE]');
|
|
resolve();
|
|
}
|
|
},
|
|
onerror(err) {
|
|
if (debug) {
|
|
console.debug(err);
|
|
}
|
|
// rethrow to stop the operation
|
|
throw err;
|
|
},
|
|
onmessage(message) {
|
|
if (debug) {
|
|
console.debug(message);
|
|
}
|
|
if (!message.data || message.event === 'ping') {
|
|
return;
|
|
}
|
|
if (message.data === '[DONE]') {
|
|
onProgress('[DONE]');
|
|
resolve();
|
|
done = true;
|
|
return;
|
|
}
|
|
onProgress(JSON.parse(message.data));
|
|
},
|
|
});
|
|
} catch (err) {
|
|
reject(err);
|
|
}
|
|
});
|
|
}
|
|
const response = await fetch(completionsURL, {
|
|
...opts,
|
|
signal: abortController.signal,
|
|
});
|
|
if (response.status !== 200) {
|
|
const body = await response.text();
|
|
const error = new Error(`Failed to send message. HTTP ${response.status} - ${body}`);
|
|
error.status = response.status;
|
|
try {
|
|
error.json = JSON.parse(body);
|
|
} catch {
|
|
error.body = body;
|
|
}
|
|
throw error;
|
|
}
|
|
return response.json();
|
|
}
|
|
|
|
async generateTitle(userMessage, botMessage) {
|
|
const instructionsPayload = {
|
|
role: 'system',
|
|
content: `Write an extremely concise subtitle for this conversation with no more than a few words. All words should be capitalized. Exclude punctuation.
|
|
|
|
||>Message:
|
|
${userMessage.message}
|
|
||>Response:
|
|
${botMessage.message}
|
|
|
|
||>Title:`,
|
|
};
|
|
|
|
const titleGenClientOptions = JSON.parse(JSON.stringify(this.options));
|
|
titleGenClientOptions.modelOptions = {
|
|
model: 'gpt-3.5-turbo',
|
|
temperature: 0,
|
|
presence_penalty: 0,
|
|
frequency_penalty: 0,
|
|
};
|
|
const titleGenClient = new ChatGPTClient(this.apiKey, titleGenClientOptions);
|
|
const result = await titleGenClient.getCompletion([instructionsPayload], null);
|
|
// remove any non-alphanumeric characters, replace multiple spaces with 1, and then trim
|
|
return result.choices[0].message.content
|
|
.replace(/[^a-zA-Z0-9' ]/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
async sendMessage(message, opts = {}) {
|
|
if (opts.clientOptions && typeof opts.clientOptions === 'object') {
|
|
this.setOptions(opts.clientOptions);
|
|
}
|
|
|
|
const conversationId = opts.conversationId || crypto.randomUUID();
|
|
const parentMessageId = opts.parentMessageId || crypto.randomUUID();
|
|
|
|
let conversation =
|
|
typeof opts.conversation === 'object'
|
|
? opts.conversation
|
|
: await this.conversationsCache.get(conversationId);
|
|
|
|
let isNewConversation = false;
|
|
if (!conversation) {
|
|
conversation = {
|
|
messages: [],
|
|
createdAt: Date.now(),
|
|
};
|
|
isNewConversation = true;
|
|
}
|
|
|
|
const shouldGenerateTitle = opts.shouldGenerateTitle && isNewConversation;
|
|
|
|
const userMessage = {
|
|
id: crypto.randomUUID(),
|
|
parentMessageId,
|
|
role: 'User',
|
|
message,
|
|
};
|
|
conversation.messages.push(userMessage);
|
|
|
|
// Doing it this way instead of having each message be a separate element in the array seems to be more reliable,
|
|
// especially when it comes to keeping the AI in character. It also seems to improve coherency and context retention.
|
|
const { prompt: payload, context } = await this.buildPrompt(
|
|
conversation.messages,
|
|
userMessage.id,
|
|
{
|
|
isChatGptModel: this.isChatGptModel,
|
|
promptPrefix: opts.promptPrefix,
|
|
},
|
|
);
|
|
|
|
if (this.options.keepNecessaryMessagesOnly) {
|
|
conversation.messages = context;
|
|
}
|
|
|
|
let reply = '';
|
|
let result = null;
|
|
if (typeof opts.onProgress === 'function') {
|
|
await this.getCompletion(
|
|
payload,
|
|
(progressMessage) => {
|
|
if (progressMessage === '[DONE]') {
|
|
return;
|
|
}
|
|
const token = this.isChatGptModel
|
|
? progressMessage.choices[0].delta.content
|
|
: progressMessage.choices[0].text;
|
|
// first event's delta content is always undefined
|
|
if (!token) {
|
|
return;
|
|
}
|
|
if (this.options.debug) {
|
|
console.debug(token);
|
|
}
|
|
if (token === this.endToken) {
|
|
return;
|
|
}
|
|
opts.onProgress(token);
|
|
reply += token;
|
|
},
|
|
opts.abortController || new AbortController(),
|
|
);
|
|
} else {
|
|
result = await this.getCompletion(
|
|
payload,
|
|
null,
|
|
opts.abortController || new AbortController(),
|
|
);
|
|
if (this.options.debug) {
|
|
console.debug(JSON.stringify(result));
|
|
}
|
|
if (this.isChatGptModel) {
|
|
reply = result.choices[0].message.content;
|
|
} else {
|
|
reply = result.choices[0].text.replace(this.endToken, '');
|
|
}
|
|
}
|
|
|
|
// avoids some rendering issues when using the CLI app
|
|
if (this.options.debug) {
|
|
console.debug();
|
|
}
|
|
|
|
reply = reply.trim();
|
|
|
|
const replyMessage = {
|
|
id: crypto.randomUUID(),
|
|
parentMessageId: userMessage.id,
|
|
role: 'ChatGPT',
|
|
message: reply,
|
|
};
|
|
conversation.messages.push(replyMessage);
|
|
|
|
const returnData = {
|
|
response: replyMessage.message,
|
|
conversationId,
|
|
parentMessageId: replyMessage.parentMessageId,
|
|
messageId: replyMessage.id,
|
|
details: result || {},
|
|
};
|
|
|
|
if (shouldGenerateTitle) {
|
|
conversation.title = await this.generateTitle(userMessage, replyMessage);
|
|
returnData.title = conversation.title;
|
|
}
|
|
|
|
await this.conversationsCache.set(conversationId, conversation);
|
|
|
|
if (this.options.returnConversation) {
|
|
returnData.conversation = conversation;
|
|
}
|
|
|
|
return returnData;
|
|
}
|
|
|
|
async buildPrompt(messages, { isChatGptModel = false, promptPrefix = null }) {
|
|
promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim();
|
|
if (promptPrefix) {
|
|
// If the prompt prefix doesn't end with the end token, add it.
|
|
if (!promptPrefix.endsWith(`${this.endToken}`)) {
|
|
promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`;
|
|
}
|
|
promptPrefix = `${this.startToken}Instructions:\n${promptPrefix}`;
|
|
} else {
|
|
const currentDateString = new Date().toLocaleDateString('en-us', {
|
|
year: 'numeric',
|
|
month: 'long',
|
|
day: 'numeric',
|
|
});
|
|
promptPrefix = `${this.startToken}Instructions:\nYou are ChatGPT, a large language model trained by OpenAI. Respond conversationally.\nCurrent date: ${currentDateString}${this.endToken}\n\n`;
|
|
}
|
|
|
|
const promptSuffix = `${this.startToken}${this.chatGptLabel}:\n`; // Prompt ChatGPT to respond.
|
|
|
|
const instructionsPayload = {
|
|
role: 'system',
|
|
name: 'instructions',
|
|
content: promptPrefix,
|
|
};
|
|
|
|
const messagePayload = {
|
|
role: 'system',
|
|
content: promptSuffix,
|
|
};
|
|
|
|
let currentTokenCount;
|
|
if (isChatGptModel) {
|
|
currentTokenCount =
|
|
this.getTokenCountForMessage(instructionsPayload) +
|
|
this.getTokenCountForMessage(messagePayload);
|
|
} else {
|
|
currentTokenCount = this.getTokenCount(`${promptPrefix}${promptSuffix}`);
|
|
}
|
|
let promptBody = '';
|
|
const maxTokenCount = this.maxPromptTokens;
|
|
|
|
const context = [];
|
|
|
|
// Iterate backwards through the messages, adding them to the prompt until we reach the max token count.
|
|
// Do this within a recursive async function so that it doesn't block the event loop for too long.
|
|
const buildPromptBody = async () => {
|
|
if (currentTokenCount < maxTokenCount && messages.length > 0) {
|
|
const message = messages.pop();
|
|
const roleLabel =
|
|
message?.isCreatedByUser || message?.role?.toLowerCase() === 'user'
|
|
? this.userLabel
|
|
: this.chatGptLabel;
|
|
const messageString = `${this.startToken}${roleLabel}:\n${
|
|
message?.text ?? message?.message
|
|
}${this.endToken}\n`;
|
|
let newPromptBody;
|
|
if (promptBody || isChatGptModel) {
|
|
newPromptBody = `${messageString}${promptBody}`;
|
|
} else {
|
|
// Always insert prompt prefix before the last user message, if not gpt-3.5-turbo.
|
|
// This makes the AI obey the prompt instructions better, which is important for custom instructions.
|
|
// After a bunch of testing, it doesn't seem to cause the AI any confusion, even if you ask it things
|
|
// like "what's the last thing I wrote?".
|
|
newPromptBody = `${promptPrefix}${messageString}${promptBody}`;
|
|
}
|
|
|
|
context.unshift(message);
|
|
|
|
const tokenCountForMessage = this.getTokenCount(messageString);
|
|
const newTokenCount = currentTokenCount + tokenCountForMessage;
|
|
if (newTokenCount > maxTokenCount) {
|
|
if (promptBody) {
|
|
// This message would put us over the token limit, so don't add it.
|
|
return false;
|
|
}
|
|
// This is the first message, so we can't add it. Just throw an error.
|
|
throw new Error(
|
|
`Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`,
|
|
);
|
|
}
|
|
promptBody = newPromptBody;
|
|
currentTokenCount = newTokenCount;
|
|
// wait for next tick to avoid blocking the event loop
|
|
await new Promise((resolve) => setImmediate(resolve));
|
|
return buildPromptBody();
|
|
}
|
|
return true;
|
|
};
|
|
|
|
await buildPromptBody();
|
|
|
|
const prompt = `${promptBody}${promptSuffix}`;
|
|
if (isChatGptModel) {
|
|
messagePayload.content = prompt;
|
|
// Add 3 tokens for Assistant Label priming after all messages have been counted.
|
|
currentTokenCount += 3;
|
|
}
|
|
|
|
// Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
|
|
this.modelOptions.max_tokens = Math.min(
|
|
this.maxContextTokens - currentTokenCount,
|
|
this.maxResponseTokens,
|
|
);
|
|
|
|
if (this.options.debug) {
|
|
console.debug(`Prompt : ${prompt}`);
|
|
}
|
|
|
|
if (isChatGptModel) {
|
|
return { prompt: [instructionsPayload, messagePayload], context };
|
|
}
|
|
return { prompt, context, promptTokens: currentTokenCount };
|
|
}
|
|
|
|
getTokenCount(text) {
|
|
return this.gptEncoder.encode(text, 'all').length;
|
|
}
|
|
|
|
/**
|
|
* Algorithm adapted from "6. Counting tokens for chat API calls" of
|
|
* https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
|
*
|
|
* An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
|
|
*
|
|
* @param {Object} message
|
|
*/
|
|
getTokenCountForMessage(message) {
|
|
// Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
|
|
let tokensPerMessage = 3;
|
|
let tokensPerName = 1;
|
|
|
|
if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
|
|
tokensPerMessage = 4;
|
|
tokensPerName = -1;
|
|
}
|
|
|
|
let numTokens = tokensPerMessage;
|
|
for (let [key, value] of Object.entries(message)) {
|
|
numTokens += this.getTokenCount(value);
|
|
if (key === 'name') {
|
|
numTokens += tokensPerName;
|
|
}
|
|
}
|
|
|
|
return numTokens;
|
|
}
|
|
}
|
|
|
|
module.exports = ChatGPTClient;
|