LibreChat/api/app/clients/OpenAIClient.js
Danny Avila ecd63eb9f1
feat: Assistants API, General File Support, Side Panel, File Explorer (#1696)
* feat: assistant name/icon in Landing & Header

* feat: assistname in textarea placeholder, and use `Assistant` as default name

* feat: display non-image files in user messages

* fix: only render files if files.length is > 0

* refactor(config -> file-config): move file related configuration values to separate module, add excel types

* chore: spreadsheet file rendering

* fix(Landing): dark mode style for Assistant Name

* refactor: move progress incrementing to own hook, start smaller, cap near limit \(1\)

* refactor(useContentHandler): add empty Text part if last part was completed tool or image

* chore: add accordion trigger border styling for dark mode

* feat: Assistant Builder model selection

* chore: use Spinner when Assistant is mutating

* fix(get/assistants): return correct response object `AssistantListResponse`

* refactor(Spinner): pass size as prop

* refactor: make assistant crud mutations optimistic, add types for options

* chore: remove assistants route and view

* chore: move assistant builder components to separate directory

* feat(ContextButton): delete Assistant via context button/dialog, add localization

* refactor: conditionally show use and context menu buttons, add localization for create assistant

* feat: save side panel states to localStorage

* style(SidePanel): improve avatar menu and assistant select styling for dark mode

* refactor: make NavToggle reusable for either side (left or right), add SidePanel Toggle with ability to close it completely

* fix: resize handle and navToggle behavior

* fix(/avatar/:assistant_id): await `deleteFile` and assign unique name to uploaded image

* WIP: file UI components from PR #576

* refactor(OpenAIMinimalIcon): pass className

* feat: formatDate helper fn

* feat: DataTableColumnHeader

* feat: add row selection, formatted row values, number of rows selected

* WIP: add files to Side panel temporarily

* feat: `LB_QueueAsyncCall`: Leaky Bucket queue for external APIs, use in `processDeleteRequest`

* fix(TFile): correct `source` type with `FileSources`

* fix(useFileHandling): use `continue` instead of return when iterating multiple files, add file type to extendedFile

* chore: add generic setter type

* refactor(processDeleteRequest): settle promises to prevent rejections from processing deletions, log errors

* feat: `useFileDeletion` to reuse file deletion logic

* refactor(useFileDeletion): make `setFiles` an optional param and use object as param

* feat: useDeleteFilesFromTable

* feat: use real `files` data and add deletion action to data table

* fix(Table): make headers sticky

* feat: add dynamic filtering for columns; only show to user Host or OpenAI storage type

* style(DropdownMenu): replace `slate` with `gray`

* style(DataTable): apply dark mode themes and other misc styling

* style(Columns): add color to OpenAI Storage option

* refactor(FileContainer): make file preview reusable

* refactor(Images): make image preview reusable

* refactor(FilePreview): make file prop optional for FileIcon and FilePreview, fix relative style

* feat(Columns): add file/image previews, set a minimum size to show for file size in bytes

* WIP: File Panel with real files and formatted

* feat: open files dialog from panel

* style: file data table mobile and general column styling fixes

* refactor(api/files): return files sorted by the most recently updated

* refactor: provide fileMap through context to prevent re-selecting files to map in different areas; remove unused imports commented out in PanelColumns

* refactor(ExtendFile): make File type optional, add `attached` to prevent attached files from being deleted on remove, make Message.files a partial TFile type

* feat: attach files through file panel

* refactor(useFileHandling): move files to the start of cache list when uploaded

* refactor(useDeleteFilesMutation): delete files from cache when successfully deleted from server

* fix(FileRow): handle possible edge case of duplication due to attaching recently uploaded file

* style(SidePanel): make resize grip border transparent, remove unnecessary styling on close sidepanel button

* feat: action utilities and tests

* refactor(actions): add `ValidationResult` type and change wording for no server URL found

* refactor(actions): check for empty server URL

* fix(data-provider): revert tsconfig to fix type issue resolution

* feat(client): first pass of actions input for assistants

* refactor(FunctionSignature): change method to output object instead of string

* refactor(models/Assistant): add actions field to schema, use searchParams object for methods, and add `getAssistant`

* feat: post actions input first pass
- create new Action document
- add actions to Assistant DB document
- create /action/:assistant_id POST route
- pass more props down from PanelSwitcher, derive assistant_id from switcher
- move privacy policy to ActionInput
- reset data on input change/validation
- add `useUpdateAction`
- conform FunctionSignature type to FunctionTool
- add action, assistant doc, update hook related types

* refactor: optimize assistant/actions relationship
- past domain in metadata as hostname and not a URL
- include domain in tool name
- add `getActions` for actions retrieval by user
- add `getAssistants` for assistant docs retrieval by user
- add `assistant_id` to Action schema
- move actions to own module as a subroute to `api/assistants`
- add `useGetActionsQuery` and `useGetAssistantDocsQuery` hooks
- fix Action type def

* feat: show assistant actions in assistant builder

* feat: switch to actions on action click, editing action styling

* fix: add Assistant state for builder panel to allow immediate selection of newly created assistants as well as retaining the current assistant when switching to a different panel within the builder

* refactor(SidePanel/NavToggle): offset less from right when SidePanel is completely collapsed

* chore: rename `processActions` -> `processRequiredActions`

* chore: rename Assistant API Action to RequiredAction

* refactor(actions): avoid nesting actual API params under generic `requestBody` to optimize LLM token usage

* fix(handleTools): avoid calling `validTool` if not defined, add optional param to skip the loading of specs, which throws an error in the context of assistants

* WIP: working first pass of toolCalls generated from openapi specs

* WIP: first pass ToolCall styling

* feat: programmatic iv encryption/decryption helpers

* fix: correct ActionAuth types/enums, and define type for AuthForm

* feat: encryption/decryption helpers for Action AuthMetadata

* refactor(getActions): remove sensitive fields from query response

* refactor(POST/actions): encrypt and remove sensitive fields from mutation response

* fix(ActionService): change ESM import to CJS

* feat: frontend auth handling for actions + optimistic update on action update/creation

* refactor(actions): use the correct variables and types for setAuth method

* refactor: POST /:assistant_id action can now handle updating an existing action, add `saved_auth_fields` to determine when user explicitly saves new auth creds. only send auth metadata if user explicitly saved fields

* refactor(createActionTool): catch errors and send back meaningful error message, add flag to `getActions` to determine whether to retrieve sensitive values or not

* refactor(ToolService): add `action` property to ToolCall PartMetadata to determine if the tool call was an action, fix parsing function name issue with actionDelimiter

* fix(ActionRequest): use URL class to correctly join endpoint parts for `execute` call

* feat: delete assistant actions

* refactor: conditionally show Available actions

* refactor: show `retrieval` and `code_interpreter` as Capabilities, swap `Switch` for `Checkbox`

* chore: remove shadow-stroke from messages

* WIP: first pass of Assistants Knowledge attachments

* refactor: remove AssistantsProvider in favor of FormProvider, fix selectedAssistant re-render bug, map Assistant file_ids to files via fileMap, initialize Knowledge component with mapped files if any exist

* fix: prevent deleting files on assistant file upload

* chore: remove console.log

* refactor(useUploadFileMutation): update files and assistants cache on upload

* chore: disable oauth option as not supported yet

* feat: cancel assistant runs

* refactor: initialize OpenAI client with helper function, resolve all related circular dependencies

* fix(DALL-E): initialization

* fix(process): openai client initialization

* fix: select an existing Assistant when the active one is deleted

* chore: allow attaching files for assistant endpoint, send back relevant OpenAI error message when uploading, deconstruct openAI initialization correctly, add `message_file` to formData when a file is attached to the message but not the assistant

* fix: add assistant_id on newConvo

* fix(initializeClient): import fix

* chore: swap setAssistant for setOption in useEffect

* fix(DALL-E): add processFileURL to loadTools call

* chore: add customConfig to debug logs

* feat: delete threads on convo delete

* chore: replace Assistants icon

* chore: remove console.dir() in `abortRun`

* feat(AssistantService): accumulate text values from run in openai.responseText

* feat: titling for assistants endpoint

* chore: move panel file components to appropriate directory, add file checks for attaching files, change icon for Attach Files

* refactor: add localizations to tools, plugins, add condition for adding/remove user plugins so tool selections don't affect this value

* chore: disable `import from url` action for now

* chore: remove textMimeTypes from default fileConfig for now

* fix: catch tool errors and send as outputs with error messages

* fix: React warning about button as descendant of button

* style: retrieval and cancelled icon

* WIP: pass isSubmitting to Parts, use InProgressCall to display cancelled tool calls correctly, show domain/function name

* fix(meilisearch): fix `postSaveHook` issue where indexing expects a mongo document, and join all text content parts for meili indexing

* ci: fix dall-e tests

* ci: fix client tests

* fix: button types in actions panel

* fix: plugin auth form persisting across tool selections

* fix(ci): update AppService spec with `loadAndFormatTools`

* fix(clearConvos): add id check earlier on

* refactor(AssistantAvatar): set previewURL dynamically when emtadata.avatar changes

* feat(assistants): addTitle cache setting

* fix(useSSE): resolve rebase conflicts

* fix: delete mutation

* style(SidePanel): make grip visible on active and hover, invisible otherwise

* ci: add data-provider tests to workflow, also update eslint/tsconfig to recognize specs, and add `text/csv` to fileConfig

* fix: handle edge case where auth object is undefined, and log errors

* refactor(actions): resolve  schemas, add tests for resolving refs, import specs from separate file for tests

* chore: remove comment

* fix(ActionsInput): re-render bug when initializing states with action fields

* fix(patch/assistant): filter undefined tools

* chore: add logging for errors in assistants routes

* fix(updateAssistant): map actions to functions to avoid overwriting

* fix(actions): properly handle GET paths

* fix(convos): unhandled delete thread exception

* refactor(AssistantService): pass both thread_id and conversationId when sending intermediate assistant messages, remove `mapMessagesToSteps` from AssistantService

* refactor(useSSE): replace all messages with runMessages and pass latestMessageId to abortRun; fix(checkMessageGaps): include tool calls when  syncing messages

* refactor(assistants/chat): invoke `createOnTextProgress` after thread creation

* chore: add typing

* style: sidepanel styling

* style: action tool call domain styling

* feat(assistants): default models, limit retrieval to certain models, add env variables to to env.example

* feat: assistants api key in EndpointService

* refactor: set assistant model to conversation on assistant switch

* refactor: set assistant model to conversation on assistant select from panel

* fix(retrieveAndProcessFile): catch attempt to download file with `assistant` purpose which is not allowed; add logging

* feat: retrieval styling, handling, and logging

* chore: rename ASSISTANTS_REVERSE_PROXY to ASSISTANTS_BASE_URL

* feat: FileContext for file metadata

* feat: context file mgmt and filtering

* style(Select): hover/rounded changes

* refactor: explicit conversation switch, endpoint dependent, through `useSelectAssistant`, which does not create new chat if current endpoint is assistant endpoint

* fix(AssistantAvatar): make empty previewURL if no avatar present

* refactor: side panel mobile styling

* style: merge tool and action section, optimize mobile styling for action/tool buttons

* fix: localStorage issues

* fix(useSelectAssistant): invoke react query hook directly in select hook as Map was not being updated in time

* style: light mode fixes

* fix: prevent sidepanel nav styling from shifting layout up

* refactor: change default layout (collapsed by default)

* style: mobile optimization of DataTable

* style: datatable

* feat: client-side hide right-side panel

* chore(useNewConvo): add partial typing for preset

* fix(useSelectAssistant): pass correct model name by using template as preset

* WIP: assistant presets

* refactor(ToolService): add native solution for `TavilySearchResults` and log tool output errors

* refactor: organize imports and use native TavilySearchResults

* fix(TavilySearchResults): stringify result

* fix(ToolCall): show tool call outputs when not an action

* chore: rename Prompt Prefix to custom instructions (in user facing text only)

* refactor(EditPresetDialog): Optimize setting title by debouncing, reset preset on dialog close to avoid state mixture

* feat: add `presetOverride` to overwrite active conversation settings when saving a Preset (relevant for client side updates only)

* feat: Assistant preset settings (client-side)

* fix(Switcher): only set assistant_id and model if current endpoint is Assistants

* feat: use `useDebouncedInput` for updating conversation settings, starting with EditPresetDialog title setting and Assistant instructions setting

* feat(Assistants): add instructions field to settings

* feat(chat/assistants): pass conversation settings to run body

* wip: begin localization and only allow actions if the assistant is created

* refactor(AssistantsPanel): knowledge localization, allow tools on creation

* feat: experimental: allow 'priming' values before assistant is created, that would normally require an assistant_id to be defined

* chore: trim console logs and make more meaningful

* chore: toast messages

* fix(ci): date test

* feat: create file when uploading Assistant Avatar

* feat: file upload rate limiting from custom config with dynamic file route initialization

* refactor: use file upload limiters on post routes only

* refactor(fileConfig): add endpoints field for endpoint specific fileconfigs, add mergeConfig function, add tests

* refactor: fileConfig route, dynamic multer instances used on all '/' and '/images' POST routes, data service and query hook

* feat: supportedMimeTypesSchema, test for array of regex

* feat: configurable file config limits

* chore: clarify assistants file knowledge prereq.

* chore(useTextarea): default to localized 'Assistant' if assistant name is empty

* feat: configurable file limits and toggle file upload per endpoint

* fix(useUploadFileMutation): prevent updating assistant.files cache if file upload is a message_file attachment

* fix(AssistantSelect): set last selected assistant only when timeout successfully runs

* refactor(queries): disable assistant queries if assistants endpoint is not enabled

* chore(Switcher): add localization

* chore: pluralize `assistant` for `EModelEndpoint key and value

* feat: show/hide assistant UI components based on endpoint availability; librechat.yaml config for disabling builder section and setting polling/timeout intervals

* fix(compactEndpointSchemas): use EModelEndpoint for schema access

* feat(runAssistant): use configured values from `librechat.yaml` for `pollIntervalMs` and `timeout`

* fix: naming issue

* wip: revert landing

* 🎉 happy birthday LibreChat (#1768)

* happy birthday LibreChat

* Refactor endpoint condition in Landing component

* Update birthday message in Eng.tsx

* fix(/config): avoid nesting ternaries

* refactor(/config): check birthday

---------

Co-authored-by: Danny Avila <messagedaniel@protonmail.com>

* fix: landing

* fix: landing

* fix(useMessageHelpers): hardcoded check to use EModelEndpoint instead

* fix(ci): convo test revert to main

* fix(assistants/chat): fix issue where assistant_id was being saved as model for convo

* chore: added logging, promises racing to prevent longer timeouts, explicit setting of maxRetries and timeouts, robust catching of invalid abortRun params

* refactor: use recoil state for `showStopButton` and only show for assistants endpoint after syncing conversation data

* refactor: optimize abortRun strategy using localStorage, refactor `abortConversation` to use async/await and await the result, refactor how the abortKey cache is set for runs

* fix(checkMessageGaps): assign `assistant_id` to synced messages if defined; prevents UI from showing blank assistant for cancelled messages

* refactor: re-order sequence of chat route, only allow aborting messages after run is created, cancel abortRun if there was a cancelling error (likely due already cancelled in chat route), and add extra logging

* chore(typedefs): add httpAgent type to OpenAIClient

* refactor: use custom implementation of retrieving run with axios to allow for timing out run query

* fix(waitForRun): handle timed out run retrieval query

* refactor: update preset conditions:
- presets will retain settings when a different endpoint is selected; for existing convos, either when modular or is assistant switch
- no longer use `navigateToConvo` on preset select

* fix: temporary calculator hack as expects string input when invoked

* fix: cancel abortRun only when cancelling error is a result of the run already being cancelled

* chore: remove use of `fileMaxSizeMB` and total counterpart (redundant)

* docs: custom config documentation update

* docs: assistants api setup and dotenv, new custom config fields

* refactor(Switcher): make Assistant switcher sticky in SidePanel

* chore(useSSE): remove console log of data and message index

* refactor(AssistantPanel): button styling and add secondary select button to bottom of panel

* refactor(OpenAIClient): allow passing conversationId to RunManager through titleConvo and initializeLLM to properly record title context tokens used in cases where conversationId was not defined by the client

* feat(assistants): token tracking for assistant runs

* chore(spendTokens): improve logging

* feat: support/exclude specific assistant Ids

* chore: add update `librechat.example.yaml`, optimize `AppService` handling, new tests for `AppService`, optimize missing/outdate config logging

* chore: mount docker logs to root of project

* chore: condense axios errors

* chore: bump vite

* chore: vite hot reload fix using latest version

* chore(getOpenAIModels): sort instruct models to the end of models list

* fix(assistants): user provided key

* fix(assistants): user provided key, invalidate more queries on revoke

---------

Co-authored-by: Marco Beretta <81851188+Berry-13@users.noreply.github.com>
2024-02-13 20:42:27 -05:00

1133 lines
35 KiB
JavaScript

const OpenAI = require('openai');
const { HttpsProxyAgent } = require('https-proxy-agent');
const {
getResponseSender,
validateVisionModel,
ImageDetailCost,
ImageDetail,
} = require('librechat-data-provider');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const {
extractBaseURL,
constructAzureURL,
getModelMaxTokens,
genAzureChatCompletion,
} = require('~/utils');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts');
const { handleOpenAIErrors } = require('./tools/util');
const spendTokens = require('~/models/spendTokens');
const { createLLM, RunManager } = require('./llm');
const ChatGPTClient = require('./ChatGPTClient');
const { isEnabled } = require('~/server/utils');
const { getFiles } = require('~/models/File');
const { summaryBuffer } = require('./memory');
const { runTitleChain } = require('./chains');
const { tokenSplit } = require('./document');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
// Cache to store Tiktoken instances
const tokenizersCache = {};
// Counter for keeping track of the number of tokenizer calls
let tokenizerCallsCount = 0;
class OpenAIClient extends BaseClient {
constructor(apiKey, options = {}) {
super(apiKey, options);
this.ChatGPTClient = new ChatGPTClient();
this.buildPrompt = this.ChatGPTClient.buildPrompt.bind(this);
this.getCompletion = this.ChatGPTClient.getCompletion.bind(this);
this.contextStrategy = options.contextStrategy
? options.contextStrategy.toLowerCase()
: 'discard';
this.shouldSummarize = this.contextStrategy === 'summarize';
/** @type {AzureOptions} */
this.azure = options.azure || false;
this.setOptions(options);
}
// TODO: PluginsClient calls this 3x, unneeded
setOptions(options) {
if (this.options && !this.options.replaceOptions) {
this.options.modelOptions = {
...this.options.modelOptions,
...options.modelOptions,
};
delete options.modelOptions;
this.options = {
...this.options,
...options,
};
} else {
this.options = options;
}
if (this.options.openaiApiKey) {
this.apiKey = this.options.openaiApiKey;
}
const modelOptions = this.options.modelOptions || {};
if (!this.modelOptions) {
this.modelOptions = {
...modelOptions,
model: modelOptions.model || 'gpt-3.5-turbo',
temperature:
typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature,
top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p,
presence_penalty:
typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty,
stop: modelOptions.stop,
};
} else {
// Update the modelOptions if it already exists
this.modelOptions = {
...this.modelOptions,
...modelOptions,
};
}
this.checkVisionRequest(this.options.attachments);
const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
if (OPENROUTER_API_KEY && !this.azure) {
this.apiKey = OPENROUTER_API_KEY;
this.useOpenRouter = true;
}
const { reverseProxyUrl: reverseProxy } = this.options;
if (
!this.useOpenRouter &&
reverseProxy &&
reverseProxy.includes('https://openrouter.ai/api/v1')
) {
this.useOpenRouter = true;
}
this.FORCE_PROMPT =
isEnabled(OPENAI_FORCE_PROMPT) ||
(reverseProxy && reverseProxy.includes('completions') && !reverseProxy.includes('chat'));
if (typeof this.options.forcePrompt === 'boolean') {
this.FORCE_PROMPT = this.options.forcePrompt;
}
if (this.azure && process.env.AZURE_OPENAI_DEFAULT_MODEL) {
this.azureEndpoint = genAzureChatCompletion(this.azure, this.modelOptions.model, this);
this.modelOptions.model = process.env.AZURE_OPENAI_DEFAULT_MODEL;
} else if (this.azure) {
this.azureEndpoint = genAzureChatCompletion(this.azure, this.modelOptions.model, this);
}
const { model } = this.modelOptions;
this.isChatCompletion = this.useOpenRouter || !!reverseProxy || model.includes('gpt');
this.isChatGptModel = this.isChatCompletion;
if (
model.includes('text-davinci') ||
model.includes('gpt-3.5-turbo-instruct') ||
this.FORCE_PROMPT
) {
this.isChatCompletion = false;
this.isChatGptModel = false;
}
const { isChatGptModel } = this;
this.isUnofficialChatGptModel =
model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');
this.maxContextTokens =
getModelMaxTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ?? 4095; // 1 less than maximum
if (this.shouldSummarize) {
this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
}
if (this.options.debug) {
logger.debug('[OpenAIClient] maxContextTokens', this.maxContextTokens);
}
this.maxResponseTokens = this.modelOptions.max_tokens || 1024;
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
throw new Error(
`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${
this.maxPromptTokens + this.maxResponseTokens
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`,
);
}
this.sender =
this.options.sender ??
getResponseSender({
model: this.modelOptions.model,
endpoint: this.options.endpoint,
endpointType: this.options.endpointType,
chatGptLabel: this.options.chatGptLabel,
modelDisplayLabel: this.options.modelDisplayLabel,
});
this.userLabel = this.options.userLabel || 'User';
this.chatGptLabel = this.options.chatGptLabel || 'Assistant';
this.setupTokens();
if (!this.modelOptions.stop && !this.isVisionModel) {
const stopTokens = [this.startToken];
if (this.endToken && this.endToken !== this.startToken) {
stopTokens.push(this.endToken);
}
stopTokens.push(`\n${this.userLabel}:`);
stopTokens.push('<|diff_marker|>');
this.modelOptions.stop = stopTokens;
}
if (reverseProxy) {
this.completionsUrl = reverseProxy;
this.langchainProxy = extractBaseURL(reverseProxy);
} else if (isChatGptModel) {
this.completionsUrl = 'https://api.openai.com/v1/chat/completions';
} else {
this.completionsUrl = 'https://api.openai.com/v1/completions';
}
if (this.azureEndpoint) {
this.completionsUrl = this.azureEndpoint;
}
if (this.azureEndpoint && this.options.debug) {
logger.debug('Using Azure endpoint');
}
if (this.useOpenRouter) {
this.completionsUrl = 'https://openrouter.ai/api/v1/chat/completions';
}
return this;
}
/**
*
* Checks if the model is a vision model based on request attachments and sets the appropriate options:
* - Sets `this.modelOptions.model` to `gpt-4-vision-preview` if the request is a vision request.
* - Sets `this.isVisionModel` to `true` if vision request.
* - Deletes `this.modelOptions.stop` if vision request.
* @param {Array<Promise<MongoFile[]> | MongoFile[]> | Record<string, MongoFile[]>} attachments
*/
checkVisionRequest(attachments) {
this.isVisionModel = validateVisionModel(this.modelOptions.model);
if (attachments && !this.isVisionModel) {
this.modelOptions.model = 'gpt-4-vision-preview';
this.isVisionModel = true;
}
if (this.isVisionModel) {
delete this.modelOptions.stop;
}
}
setupTokens() {
if (this.isChatCompletion) {
this.startToken = '||>';
this.endToken = '';
} else if (this.isUnofficialChatGptModel) {
this.startToken = '<|im_start|>';
this.endToken = '<|im_end|>';
} else {
this.startToken = '||>';
this.endToken = '';
}
}
// Selects an appropriate tokenizer based on the current configuration of the client instance.
// It takes into account factors such as whether it's a chat completion, an unofficial chat GPT model, etc.
selectTokenizer() {
let tokenizer;
this.encoding = 'text-davinci-003';
if (this.isChatCompletion) {
this.encoding = 'cl100k_base';
tokenizer = this.constructor.getTokenizer(this.encoding);
} else if (this.isUnofficialChatGptModel) {
const extendSpecialTokens = {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
};
tokenizer = this.constructor.getTokenizer(this.encoding, true, extendSpecialTokens);
} else {
try {
const { model } = this.modelOptions;
this.encoding = model.includes('instruct') ? 'text-davinci-003' : model;
tokenizer = this.constructor.getTokenizer(this.encoding, true);
} catch {
tokenizer = this.constructor.getTokenizer('text-davinci-003', true);
}
}
return tokenizer;
}
// Retrieves a tokenizer either from the cache or creates a new one if one doesn't exist in the cache.
// If a tokenizer is being created, it's also added to the cache.
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
let tokenizer;
if (tokenizersCache[encoding]) {
tokenizer = tokenizersCache[encoding];
} else {
if (isModelName) {
tokenizer = encodingForModel(encoding, extendSpecialTokens);
} else {
tokenizer = getEncoding(encoding, extendSpecialTokens);
}
tokenizersCache[encoding] = tokenizer;
}
return tokenizer;
}
// Frees all encoders in the cache and resets the count.
static freeAndResetAllEncoders() {
try {
Object.keys(tokenizersCache).forEach((key) => {
if (tokenizersCache[key]) {
tokenizersCache[key].free();
delete tokenizersCache[key];
}
});
// Reset count
tokenizerCallsCount = 1;
} catch (error) {
logger.error('[OpenAIClient] Free and reset encoders error', error);
}
}
// Checks if the cache of tokenizers has reached a certain size. If it has, it frees and resets all tokenizers.
resetTokenizersIfNecessary() {
if (tokenizerCallsCount >= 25) {
if (this.options.debug) {
logger.debug('[OpenAIClient] freeAndResetAllEncoders: reached 25 encodings, resetting...');
}
this.constructor.freeAndResetAllEncoders();
}
tokenizerCallsCount++;
}
/**
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
* @param {string} text - The text to get the token count for.
* @returns {number} The token count of the given text.
*/
getTokenCount(text) {
this.resetTokenizersIfNecessary();
try {
const tokenizer = this.selectTokenizer();
return tokenizer.encode(text, 'all').length;
} catch (error) {
this.constructor.freeAndResetAllEncoders();
const tokenizer = this.selectTokenizer();
return tokenizer.encode(text, 'all').length;
}
}
/**
* Calculate the token cost for an image based on its dimensions and detail level.
*
* @param {Object} image - The image object.
* @param {number} image.width - The width of the image.
* @param {number} image.height - The height of the image.
* @param {'low'|'high'|string|undefined} [image.detail] - The detail level ('low', 'high', or other).
* @returns {number} The calculated token cost.
*/
calculateImageTokenCost({ width, height, detail }) {
if (detail === 'low') {
return ImageDetailCost.LOW;
}
// Calculate the number of 512px squares
const numSquares = Math.ceil(width / 512) * Math.ceil(height / 512);
// Default to high detail cost calculation
return numSquares * ImageDetailCost.HIGH + ImageDetailCost.ADDITIONAL;
}
getSaveOptions() {
return {
chatGptLabel: this.options.chatGptLabel,
promptPrefix: this.options.promptPrefix,
resendImages: this.options.resendImages,
imageDetail: this.options.imageDetail,
...this.modelOptions,
};
}
getBuildMessagesOptions(opts) {
return {
isChatCompletion: this.isChatCompletion,
promptPrefix: opts.promptPrefix,
abortController: opts.abortController,
};
}
/**
*
* @param {TMessage[]} _messages
* @returns {TMessage[]}
*/
async addPreviousAttachments(_messages) {
if (!this.options.resendImages) {
return _messages;
}
/**
*
* @param {TMessage} message
*/
const processMessage = async (message) => {
if (!this.message_file_map) {
/** @type {Record<string, MongoFile[]> */
this.message_file_map = {};
}
const fileIds = message.files.map((file) => file.file_id);
const files = await getFiles({
file_id: { $in: fileIds },
});
await this.addImageURLs(message, files);
this.message_file_map[message.messageId] = files;
return message;
};
const promises = [];
for (const message of _messages) {
if (!message.files) {
promises.push(message);
continue;
}
promises.push(processMessage(message));
}
const messages = await Promise.all(promises);
this.checkVisionRequest(this.message_file_map);
return messages;
}
/**
*
* Adds image URLs to the message object and returns the files
*
* @param {TMessage[]} messages
* @param {MongoFile[]} files
* @returns {Promise<MongoFile[]>}
*/
async addImageURLs(message, attachments) {
const { files, image_urls } = await encodeAndFormat(this.options.req, attachments);
message.image_urls = image_urls;
return files;
}
async buildMessages(
messages,
parentMessageId,
{ isChatCompletion = false, promptPrefix = null },
opts,
) {
let orderedMessages = this.constructor.getMessagesForConversation({
messages,
parentMessageId,
summary: this.shouldSummarize,
});
if (!isChatCompletion) {
return await this.buildPrompt(orderedMessages, {
isChatGptModel: isChatCompletion,
promptPrefix,
});
}
let payload;
let instructions;
let tokenCountMap;
let promptTokens;
promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim();
if (promptPrefix) {
promptPrefix = `Instructions:\n${promptPrefix}`;
instructions = {
role: 'system',
name: 'instructions',
content: promptPrefix,
};
if (this.contextStrategy) {
instructions.tokenCount = this.getTokenCountForMessage(instructions);
}
}
if (this.options.attachments) {
const attachments = (await this.options.attachments).filter((file) =>
file.type.includes('image'),
);
if (this.message_file_map) {
this.message_file_map[orderedMessages[orderedMessages.length - 1].messageId] = attachments;
} else {
this.message_file_map = {
[orderedMessages[orderedMessages.length - 1].messageId]: attachments,
};
}
const files = await this.addImageURLs(
orderedMessages[orderedMessages.length - 1],
attachments,
);
this.options.attachments = files;
}
const formattedMessages = orderedMessages.map((message, i) => {
const formattedMessage = formatMessage({
message,
userName: this.options?.name,
assistantName: this.options?.chatGptLabel,
});
const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount;
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage);
}
/* If message has files, calculate image token cost */
if (this.message_file_map && this.message_file_map[message.messageId]) {
const attachments = this.message_file_map[message.messageId];
for (const file of attachments) {
orderedMessages[i].tokenCount += this.calculateImageTokenCost({
width: file.width,
height: file.height,
detail: this.options.imageDetail ?? ImageDetail.auto,
});
}
}
return formattedMessage;
});
// TODO: need to handle interleaving instructions better
if (this.contextStrategy) {
({ payload, tokenCountMap, promptTokens, messages } = await this.handleContextStrategy({
instructions,
orderedMessages,
formattedMessages,
}));
}
const result = {
prompt: payload,
promptTokens,
messages,
};
if (tokenCountMap) {
tokenCountMap.instructions = instructions?.tokenCount;
result.tokenCountMap = tokenCountMap;
}
if (promptTokens >= 0 && typeof opts?.getReqData === 'function') {
opts.getReqData({ promptTokens });
}
return result;
}
async sendCompletion(payload, opts = {}) {
let reply = '';
let result = null;
let streamResult = null;
this.modelOptions.user = this.user;
const invalidBaseUrl = this.completionsUrl && extractBaseURL(this.completionsUrl) === null;
const useOldMethod = !!(invalidBaseUrl || !this.isChatCompletion);
if (typeof opts.onProgress === 'function' && useOldMethod) {
await this.getCompletion(
payload,
(progressMessage) => {
if (progressMessage === '[DONE]') {
return;
}
if (progressMessage.choices) {
streamResult = progressMessage;
}
let token = null;
if (this.isChatCompletion) {
token =
progressMessage.choices?.[0]?.delta?.content ?? progressMessage.choices?.[0]?.text;
} else {
token = progressMessage.choices?.[0]?.text;
}
if (!token && this.useOpenRouter) {
token = progressMessage.choices?.[0]?.message?.content;
}
// first event's delta content is always undefined
if (!token) {
return;
}
if (token === this.endToken) {
return;
}
opts.onProgress(token);
reply += token;
},
opts.abortController || new AbortController(),
);
} else if (typeof opts.onProgress === 'function' || this.options.useChatCompletion) {
reply = await this.chatCompletion({
payload,
clientOptions: opts,
onProgress: opts.onProgress,
abortController: opts.abortController,
});
} else {
result = await this.getCompletion(
payload,
null,
opts.abortController || new AbortController(),
);
logger.debug('[OpenAIClient] sendCompletion: result', result);
if (this.isChatCompletion) {
reply = result.choices[0].message.content;
} else {
reply = result.choices[0].text.replace(this.endToken, '');
}
}
if (streamResult && typeof opts.addMetadata === 'function') {
const { finish_reason } = streamResult.choices[0];
opts.addMetadata({ finish_reason });
}
return reply.trim();
}
initializeLLM({
model = 'gpt-3.5-turbo',
modelName,
temperature = 0.2,
presence_penalty = 0,
frequency_penalty = 0,
max_tokens,
streaming,
context,
tokenBuffer,
initialMessageCount,
conversationId,
}) {
const modelOptions = {
modelName: modelName ?? model,
temperature,
presence_penalty,
frequency_penalty,
user: this.user,
};
if (max_tokens) {
modelOptions.max_tokens = max_tokens;
}
const configOptions = {};
if (this.langchainProxy) {
configOptions.basePath = this.langchainProxy;
}
if (this.useOpenRouter) {
configOptions.basePath = 'https://openrouter.ai/api/v1';
configOptions.baseOptions = {
headers: {
'HTTP-Referer': 'https://librechat.ai',
'X-Title': 'LibreChat',
},
};
}
if (this.options.proxy) {
configOptions.httpAgent = new HttpsProxyAgent(this.options.proxy);
configOptions.httpsAgent = new HttpsProxyAgent(this.options.proxy);
}
const { req, res, debug } = this.options;
const runManager = new RunManager({ req, res, debug, abortController: this.abortController });
this.runManager = runManager;
const llm = createLLM({
modelOptions,
configOptions,
openAIApiKey: this.apiKey,
azure: this.azure,
streaming,
callbacks: runManager.createCallbacks({
context,
tokenBuffer,
conversationId: this.conversationId ?? conversationId,
initialMessageCount,
}),
});
return llm;
}
/**
* Generates a concise title for a conversation based on the user's input text and response.
* Uses either specified method or starts with the OpenAI `functions` method (using LangChain).
* If the `functions` method fails, it falls back to the `completion` method,
* which involves sending a chat completion request with specific instructions for title generation.
*
* @param {Object} params - The parameters for the conversation title generation.
* @param {string} params.text - The user's input.
* @param {string} [params.conversationId] - The current conversationId, if not already defined on client initialization.
* @param {string} [params.responseText=''] - The AI's immediate response to the user.
*
* @returns {Promise<string | 'New Chat'>} A promise that resolves to the generated conversation title.
* In case of failure, it will return the default title, "New Chat".
*/
async titleConvo({ text, conversationId, responseText = '' }) {
let title = 'New Chat';
const convo = `||>User:
"${truncateText(text)}"
||>Response:
"${JSON.stringify(truncateText(responseText))}"`;
const { OPENAI_TITLE_MODEL } = process.env ?? {};
const model = this.options.titleModel ?? OPENAI_TITLE_MODEL ?? 'gpt-3.5-turbo';
const modelOptions = {
// TODO: remove the gpt fallback and make it specific to endpoint
model,
temperature: 0.2,
presence_penalty: 0,
frequency_penalty: 0,
max_tokens: 16,
};
const titleChatCompletion = async () => {
modelOptions.model = model;
if (this.azure) {
modelOptions.model = process.env.AZURE_OPENAI_DEFAULT_MODEL ?? modelOptions.model;
this.azureEndpoint = genAzureChatCompletion(this.azure, modelOptions.model, this);
}
const instructionsPayload = [
{
role: 'system',
content: `Detect user language and write in the same language an extremely concise title for this conversation, which you must accurately detect.
Write in the detected language. Title in 5 Words or Less. No Punctuation or Quotation. Do not mention the language. All first letters of every word should be capitalized and write the title in User Language only.
${convo}
||>Title:`,
},
];
try {
title = (
await this.sendPayload(instructionsPayload, { modelOptions, useChatCompletion: true })
).replaceAll('"', '');
} catch (e) {
logger.error(
'[OpenAIClient] There was an issue generating the title with the completion method',
e,
);
}
};
if (this.options.titleMethod === 'completion') {
await titleChatCompletion();
logger.debug('[OpenAIClient] Convo Title: ' + title);
return title;
}
try {
this.abortController = new AbortController();
const llm = this.initializeLLM({
...modelOptions,
conversationId,
context: 'title',
tokenBuffer: 150,
});
title = await runTitleChain({ llm, text, convo, signal: this.abortController.signal });
} catch (e) {
if (e?.message?.toLowerCase()?.includes('abort')) {
logger.debug('[OpenAIClient] Aborted title generation');
return;
}
logger.error(
'[OpenAIClient] There was an issue generating title with LangChain, trying completion method...',
e,
);
await titleChatCompletion();
}
logger.debug('[OpenAIClient] Convo Title: ' + title);
return title;
}
async summarizeMessages({ messagesToRefine, remainingContextTokens }) {
logger.debug('[OpenAIClient] Summarizing messages...');
let context = messagesToRefine;
let prompt;
// TODO: remove the gpt fallback and make it specific to endpoint
const { OPENAI_SUMMARY_MODEL = 'gpt-3.5-turbo' } = process.env ?? {};
const model = this.options.summaryModel ?? OPENAI_SUMMARY_MODEL;
const maxContextTokens =
getModelMaxTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ?? 4095; // 1 less than maximum
// 3 tokens for the assistant label, and 98 for the summarizer prompt (101)
let promptBuffer = 101;
/*
* Note: token counting here is to block summarization if it exceeds the spend; complete
* accuracy is not important. Actual spend will happen after successful summarization.
*/
const excessTokenCount = context.reduce(
(acc, message) => acc + message.tokenCount,
promptBuffer,
);
if (excessTokenCount > maxContextTokens) {
({ context } = await this.getMessagesWithinTokenLimit(context, maxContextTokens));
}
if (context.length === 0) {
logger.debug(
'[OpenAIClient] Summary context is empty, using latest message within token limit',
);
promptBuffer = 32;
const { text, ...latestMessage } = messagesToRefine[messagesToRefine.length - 1];
const splitText = await tokenSplit({
text,
chunkSize: Math.floor((maxContextTokens - promptBuffer) / 3),
});
const newText = `${splitText[0]}\n...[truncated]...\n${splitText[splitText.length - 1]}`;
prompt = CUT_OFF_PROMPT;
context = [
formatMessage({
message: {
...latestMessage,
text: newText,
},
userName: this.options?.name,
assistantName: this.options?.chatGptLabel,
}),
];
}
// TODO: We can accurately count the tokens here before handleChatModelStart
// by recreating the summary prompt (single message) to avoid LangChain handling
const initialPromptTokens = this.maxContextTokens - remainingContextTokens;
logger.debug('[OpenAIClient] initialPromptTokens', initialPromptTokens);
const llm = this.initializeLLM({
model,
temperature: 0.2,
context: 'summary',
tokenBuffer: initialPromptTokens,
});
try {
const summaryMessage = await summaryBuffer({
llm,
debug: this.options.debug,
prompt,
context,
formatOptions: {
userName: this.options?.name,
assistantName: this.options?.chatGptLabel ?? this.options?.modelLabel,
},
previous_summary: this.previous_summary?.summary,
signal: this.abortController.signal,
});
const summaryTokenCount = this.getTokenCountForMessage(summaryMessage);
if (this.options.debug) {
logger.debug('[OpenAIClient] summaryTokenCount', summaryTokenCount);
logger.debug(
`[OpenAIClient] Summarization complete: remainingContextTokens: ${remainingContextTokens}, after refining: ${
remainingContextTokens - summaryTokenCount
}`,
);
}
return { summaryMessage, summaryTokenCount };
} catch (e) {
if (e?.message?.toLowerCase()?.includes('abort')) {
logger.debug('[OpenAIClient] Aborted summarization');
const { run, runId } = this.runManager.getRunByConversationId(this.conversationId);
if (run && run.error) {
const { error } = run;
this.runManager.removeRun(runId);
throw new Error(error);
}
}
logger.error('[OpenAIClient] Error summarizing messages', e);
return {};
}
}
async recordTokenUsage({ promptTokens, completionTokens }) {
logger.debug('[OpenAIClient] recordTokenUsage:', { promptTokens, completionTokens });
await spendTokens(
{
user: this.user,
model: this.modelOptions.model,
context: 'message',
conversationId: this.conversationId,
endpointTokenConfig: this.options.endpointTokenConfig,
},
{ promptTokens, completionTokens },
);
}
getTokenCountForResponse(response) {
return this.getTokenCountForMessage({
role: 'assistant',
content: response.text,
});
}
async chatCompletion({ payload, onProgress, clientOptions, abortController = null }) {
let error = null;
const errorCallback = (err) => (error = err);
let intermediateReply = '';
try {
if (!abortController) {
abortController = new AbortController();
}
let modelOptions = { ...this.modelOptions };
if (typeof onProgress === 'function') {
modelOptions.stream = true;
}
if (this.isChatCompletion) {
modelOptions.messages = payload;
} else {
modelOptions.prompt = payload;
}
const baseURL = extractBaseURL(this.completionsUrl);
// let { messages: _msgsToLog, ...modelOptionsToLog } = modelOptions;
// if (modelOptionsToLog.messages) {
// _msgsToLog = modelOptionsToLog.messages.map((msg) => {
// let { content, ...rest } = msg;
// if (content)
// return { ...rest, content: truncateText(content) };
// });
// }
logger.debug('[OpenAIClient] chatCompletion', { baseURL, modelOptions });
const opts = {
baseURL,
};
if (this.useOpenRouter) {
opts.defaultHeaders = {
'HTTP-Referer': 'https://librechat.ai',
'X-Title': 'LibreChat',
};
}
if (this.options.headers) {
opts.defaultHeaders = { ...opts.defaultHeaders, ...this.options.headers };
}
if (this.options.proxy) {
opts.httpAgent = new HttpsProxyAgent(this.options.proxy);
}
if (this.isVisionModel) {
modelOptions.max_tokens = 4000;
}
if (this.azure || this.options.azure) {
// Azure does not accept `model` in the body, so we need to remove it.
delete modelOptions.model;
opts.baseURL = this.langchainProxy
? constructAzureURL({
baseURL: this.langchainProxy,
azure: this.azure,
})
: this.azureEndpoint.split(/\/(chat|completion)/)[0];
opts.defaultQuery = { 'api-version': this.azure.azureOpenAIApiVersion };
opts.defaultHeaders = { ...opts.defaultHeaders, 'api-key': this.apiKey };
}
if (process.env.OPENAI_ORGANIZATION) {
opts.organization = process.env.OPENAI_ORGANIZATION;
}
let chatCompletion;
const openai = new OpenAI({
apiKey: this.apiKey,
...opts,
});
/* hacky fixes for Mistral AI API:
- Re-orders system message to the top of the messages payload, as not allowed anywhere else
- If there is only one message and it's a system message, change the role to user
*/
if (opts.baseURL.includes('https://api.mistral.ai/v1') && modelOptions.messages) {
const { messages } = modelOptions;
const systemMessageIndex = messages.findIndex((msg) => msg.role === 'system');
if (systemMessageIndex > 0) {
const [systemMessage] = messages.splice(systemMessageIndex, 1);
messages.unshift(systemMessage);
}
modelOptions.messages = messages;
if (messages.length === 1 && messages[0].role === 'system') {
modelOptions.messages[0].role = 'user';
}
}
if (this.options.addParams && typeof this.options.addParams === 'object') {
modelOptions = {
...modelOptions,
...this.options.addParams,
};
}
if (this.options.dropParams && Array.isArray(this.options.dropParams)) {
this.options.dropParams.forEach((param) => {
delete modelOptions[param];
});
}
let UnexpectedRoleError = false;
if (modelOptions.stream) {
const stream = await openai.beta.chat.completions
.stream({
...modelOptions,
stream: true,
})
.on('abort', () => {
/* Do nothing here */
})
.on('error', (err) => {
handleOpenAIErrors(err, errorCallback, 'stream');
})
.on('finalMessage', (message) => {
if (message?.role !== 'assistant') {
stream.messages.push({ role: 'assistant', content: intermediateReply });
UnexpectedRoleError = true;
}
});
for await (const chunk of stream) {
const token = chunk.choices[0]?.delta?.content || '';
intermediateReply += token;
onProgress(token);
if (abortController.signal.aborted) {
stream.controller.abort();
break;
}
}
if (!UnexpectedRoleError) {
chatCompletion = await stream.finalChatCompletion().catch((err) => {
handleOpenAIErrors(err, errorCallback, 'finalChatCompletion');
});
}
}
// regular completion
else {
chatCompletion = await openai.chat.completions
.create({
...modelOptions,
})
.catch((err) => {
handleOpenAIErrors(err, errorCallback, 'create');
});
}
if (!chatCompletion && UnexpectedRoleError) {
throw new Error(
'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
);
} else if (!chatCompletion && error) {
throw new Error(error);
} else if (!chatCompletion) {
throw new Error('Chat completion failed');
}
const { message, finish_reason } = chatCompletion.choices[0];
if (chatCompletion && typeof clientOptions.addMetadata === 'function') {
clientOptions.addMetadata({ finish_reason });
}
logger.debug('[OpenAIClient] chatCompletion response', chatCompletion);
return message.content;
} catch (err) {
if (
err?.message?.includes('abort') ||
(err instanceof OpenAI.APIError && err?.message?.includes('abort'))
) {
return intermediateReply;
}
if (
err?.message?.includes(
'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
) ||
err?.message?.includes('The server had an error processing your request') ||
err?.message?.includes('missing finish_reason') ||
err?.message?.includes('missing role') ||
(err instanceof OpenAI.OpenAIError && err?.message?.includes('missing finish_reason'))
) {
logger.error('[OpenAIClient] Known OpenAI error:', err);
return intermediateReply;
} else if (err instanceof OpenAI.APIError) {
if (intermediateReply) {
return intermediateReply;
} else {
throw err;
}
} else {
logger.error('[OpenAIClient.chatCompletion] Unhandled error type', err);
throw err;
}
}
}
}
module.exports = OpenAIClient;