LibreChat/api/app/clients/BaseClient.js
Danny Avila ecd63eb9f1
feat: Assistants API, General File Support, Side Panel, File Explorer (#1696)
* feat: assistant name/icon in Landing & Header

* feat: assistname in textarea placeholder, and use `Assistant` as default name

* feat: display non-image files in user messages

* fix: only render files if files.length is > 0

* refactor(config -> file-config): move file related configuration values to separate module, add excel types

* chore: spreadsheet file rendering

* fix(Landing): dark mode style for Assistant Name

* refactor: move progress incrementing to own hook, start smaller, cap near limit \(1\)

* refactor(useContentHandler): add empty Text part if last part was completed tool or image

* chore: add accordion trigger border styling for dark mode

* feat: Assistant Builder model selection

* chore: use Spinner when Assistant is mutating

* fix(get/assistants): return correct response object `AssistantListResponse`

* refactor(Spinner): pass size as prop

* refactor: make assistant crud mutations optimistic, add types for options

* chore: remove assistants route and view

* chore: move assistant builder components to separate directory

* feat(ContextButton): delete Assistant via context button/dialog, add localization

* refactor: conditionally show use and context menu buttons, add localization for create assistant

* feat: save side panel states to localStorage

* style(SidePanel): improve avatar menu and assistant select styling for dark mode

* refactor: make NavToggle reusable for either side (left or right), add SidePanel Toggle with ability to close it completely

* fix: resize handle and navToggle behavior

* fix(/avatar/:assistant_id): await `deleteFile` and assign unique name to uploaded image

* WIP: file UI components from PR #576

* refactor(OpenAIMinimalIcon): pass className

* feat: formatDate helper fn

* feat: DataTableColumnHeader

* feat: add row selection, formatted row values, number of rows selected

* WIP: add files to Side panel temporarily

* feat: `LB_QueueAsyncCall`: Leaky Bucket queue for external APIs, use in `processDeleteRequest`

* fix(TFile): correct `source` type with `FileSources`

* fix(useFileHandling): use `continue` instead of return when iterating multiple files, add file type to extendedFile

* chore: add generic setter type

* refactor(processDeleteRequest): settle promises to prevent rejections from processing deletions, log errors

* feat: `useFileDeletion` to reuse file deletion logic

* refactor(useFileDeletion): make `setFiles` an optional param and use object as param

* feat: useDeleteFilesFromTable

* feat: use real `files` data and add deletion action to data table

* fix(Table): make headers sticky

* feat: add dynamic filtering for columns; only show to user Host or OpenAI storage type

* style(DropdownMenu): replace `slate` with `gray`

* style(DataTable): apply dark mode themes and other misc styling

* style(Columns): add color to OpenAI Storage option

* refactor(FileContainer): make file preview reusable

* refactor(Images): make image preview reusable

* refactor(FilePreview): make file prop optional for FileIcon and FilePreview, fix relative style

* feat(Columns): add file/image previews, set a minimum size to show for file size in bytes

* WIP: File Panel with real files and formatted

* feat: open files dialog from panel

* style: file data table mobile and general column styling fixes

* refactor(api/files): return files sorted by the most recently updated

* refactor: provide fileMap through context to prevent re-selecting files to map in different areas; remove unused imports commented out in PanelColumns

* refactor(ExtendFile): make File type optional, add `attached` to prevent attached files from being deleted on remove, make Message.files a partial TFile type

* feat: attach files through file panel

* refactor(useFileHandling): move files to the start of cache list when uploaded

* refactor(useDeleteFilesMutation): delete files from cache when successfully deleted from server

* fix(FileRow): handle possible edge case of duplication due to attaching recently uploaded file

* style(SidePanel): make resize grip border transparent, remove unnecessary styling on close sidepanel button

* feat: action utilities and tests

* refactor(actions): add `ValidationResult` type and change wording for no server URL found

* refactor(actions): check for empty server URL

* fix(data-provider): revert tsconfig to fix type issue resolution

* feat(client): first pass of actions input for assistants

* refactor(FunctionSignature): change method to output object instead of string

* refactor(models/Assistant): add actions field to schema, use searchParams object for methods, and add `getAssistant`

* feat: post actions input first pass
- create new Action document
- add actions to Assistant DB document
- create /action/:assistant_id POST route
- pass more props down from PanelSwitcher, derive assistant_id from switcher
- move privacy policy to ActionInput
- reset data on input change/validation
- add `useUpdateAction`
- conform FunctionSignature type to FunctionTool
- add action, assistant doc, update hook related types

* refactor: optimize assistant/actions relationship
- past domain in metadata as hostname and not a URL
- include domain in tool name
- add `getActions` for actions retrieval by user
- add `getAssistants` for assistant docs retrieval by user
- add `assistant_id` to Action schema
- move actions to own module as a subroute to `api/assistants`
- add `useGetActionsQuery` and `useGetAssistantDocsQuery` hooks
- fix Action type def

* feat: show assistant actions in assistant builder

* feat: switch to actions on action click, editing action styling

* fix: add Assistant state for builder panel to allow immediate selection of newly created assistants as well as retaining the current assistant when switching to a different panel within the builder

* refactor(SidePanel/NavToggle): offset less from right when SidePanel is completely collapsed

* chore: rename `processActions` -> `processRequiredActions`

* chore: rename Assistant API Action to RequiredAction

* refactor(actions): avoid nesting actual API params under generic `requestBody` to optimize LLM token usage

* fix(handleTools): avoid calling `validTool` if not defined, add optional param to skip the loading of specs, which throws an error in the context of assistants

* WIP: working first pass of toolCalls generated from openapi specs

* WIP: first pass ToolCall styling

* feat: programmatic iv encryption/decryption helpers

* fix: correct ActionAuth types/enums, and define type for AuthForm

* feat: encryption/decryption helpers for Action AuthMetadata

* refactor(getActions): remove sensitive fields from query response

* refactor(POST/actions): encrypt and remove sensitive fields from mutation response

* fix(ActionService): change ESM import to CJS

* feat: frontend auth handling for actions + optimistic update on action update/creation

* refactor(actions): use the correct variables and types for setAuth method

* refactor: POST /:assistant_id action can now handle updating an existing action, add `saved_auth_fields` to determine when user explicitly saves new auth creds. only send auth metadata if user explicitly saved fields

* refactor(createActionTool): catch errors and send back meaningful error message, add flag to `getActions` to determine whether to retrieve sensitive values or not

* refactor(ToolService): add `action` property to ToolCall PartMetadata to determine if the tool call was an action, fix parsing function name issue with actionDelimiter

* fix(ActionRequest): use URL class to correctly join endpoint parts for `execute` call

* feat: delete assistant actions

* refactor: conditionally show Available actions

* refactor: show `retrieval` and `code_interpreter` as Capabilities, swap `Switch` for `Checkbox`

* chore: remove shadow-stroke from messages

* WIP: first pass of Assistants Knowledge attachments

* refactor: remove AssistantsProvider in favor of FormProvider, fix selectedAssistant re-render bug, map Assistant file_ids to files via fileMap, initialize Knowledge component with mapped files if any exist

* fix: prevent deleting files on assistant file upload

* chore: remove console.log

* refactor(useUploadFileMutation): update files and assistants cache on upload

* chore: disable oauth option as not supported yet

* feat: cancel assistant runs

* refactor: initialize OpenAI client with helper function, resolve all related circular dependencies

* fix(DALL-E): initialization

* fix(process): openai client initialization

* fix: select an existing Assistant when the active one is deleted

* chore: allow attaching files for assistant endpoint, send back relevant OpenAI error message when uploading, deconstruct openAI initialization correctly, add `message_file` to formData when a file is attached to the message but not the assistant

* fix: add assistant_id on newConvo

* fix(initializeClient): import fix

* chore: swap setAssistant for setOption in useEffect

* fix(DALL-E): add processFileURL to loadTools call

* chore: add customConfig to debug logs

* feat: delete threads on convo delete

* chore: replace Assistants icon

* chore: remove console.dir() in `abortRun`

* feat(AssistantService): accumulate text values from run in openai.responseText

* feat: titling for assistants endpoint

* chore: move panel file components to appropriate directory, add file checks for attaching files, change icon for Attach Files

* refactor: add localizations to tools, plugins, add condition for adding/remove user plugins so tool selections don't affect this value

* chore: disable `import from url` action for now

* chore: remove textMimeTypes from default fileConfig for now

* fix: catch tool errors and send as outputs with error messages

* fix: React warning about button as descendant of button

* style: retrieval and cancelled icon

* WIP: pass isSubmitting to Parts, use InProgressCall to display cancelled tool calls correctly, show domain/function name

* fix(meilisearch): fix `postSaveHook` issue where indexing expects a mongo document, and join all text content parts for meili indexing

* ci: fix dall-e tests

* ci: fix client tests

* fix: button types in actions panel

* fix: plugin auth form persisting across tool selections

* fix(ci): update AppService spec with `loadAndFormatTools`

* fix(clearConvos): add id check earlier on

* refactor(AssistantAvatar): set previewURL dynamically when emtadata.avatar changes

* feat(assistants): addTitle cache setting

* fix(useSSE): resolve rebase conflicts

* fix: delete mutation

* style(SidePanel): make grip visible on active and hover, invisible otherwise

* ci: add data-provider tests to workflow, also update eslint/tsconfig to recognize specs, and add `text/csv` to fileConfig

* fix: handle edge case where auth object is undefined, and log errors

* refactor(actions): resolve  schemas, add tests for resolving refs, import specs from separate file for tests

* chore: remove comment

* fix(ActionsInput): re-render bug when initializing states with action fields

* fix(patch/assistant): filter undefined tools

* chore: add logging for errors in assistants routes

* fix(updateAssistant): map actions to functions to avoid overwriting

* fix(actions): properly handle GET paths

* fix(convos): unhandled delete thread exception

* refactor(AssistantService): pass both thread_id and conversationId when sending intermediate assistant messages, remove `mapMessagesToSteps` from AssistantService

* refactor(useSSE): replace all messages with runMessages and pass latestMessageId to abortRun; fix(checkMessageGaps): include tool calls when  syncing messages

* refactor(assistants/chat): invoke `createOnTextProgress` after thread creation

* chore: add typing

* style: sidepanel styling

* style: action tool call domain styling

* feat(assistants): default models, limit retrieval to certain models, add env variables to to env.example

* feat: assistants api key in EndpointService

* refactor: set assistant model to conversation on assistant switch

* refactor: set assistant model to conversation on assistant select from panel

* fix(retrieveAndProcessFile): catch attempt to download file with `assistant` purpose which is not allowed; add logging

* feat: retrieval styling, handling, and logging

* chore: rename ASSISTANTS_REVERSE_PROXY to ASSISTANTS_BASE_URL

* feat: FileContext for file metadata

* feat: context file mgmt and filtering

* style(Select): hover/rounded changes

* refactor: explicit conversation switch, endpoint dependent, through `useSelectAssistant`, which does not create new chat if current endpoint is assistant endpoint

* fix(AssistantAvatar): make empty previewURL if no avatar present

* refactor: side panel mobile styling

* style: merge tool and action section, optimize mobile styling for action/tool buttons

* fix: localStorage issues

* fix(useSelectAssistant): invoke react query hook directly in select hook as Map was not being updated in time

* style: light mode fixes

* fix: prevent sidepanel nav styling from shifting layout up

* refactor: change default layout (collapsed by default)

* style: mobile optimization of DataTable

* style: datatable

* feat: client-side hide right-side panel

* chore(useNewConvo): add partial typing for preset

* fix(useSelectAssistant): pass correct model name by using template as preset

* WIP: assistant presets

* refactor(ToolService): add native solution for `TavilySearchResults` and log tool output errors

* refactor: organize imports and use native TavilySearchResults

* fix(TavilySearchResults): stringify result

* fix(ToolCall): show tool call outputs when not an action

* chore: rename Prompt Prefix to custom instructions (in user facing text only)

* refactor(EditPresetDialog): Optimize setting title by debouncing, reset preset on dialog close to avoid state mixture

* feat: add `presetOverride` to overwrite active conversation settings when saving a Preset (relevant for client side updates only)

* feat: Assistant preset settings (client-side)

* fix(Switcher): only set assistant_id and model if current endpoint is Assistants

* feat: use `useDebouncedInput` for updating conversation settings, starting with EditPresetDialog title setting and Assistant instructions setting

* feat(Assistants): add instructions field to settings

* feat(chat/assistants): pass conversation settings to run body

* wip: begin localization and only allow actions if the assistant is created

* refactor(AssistantsPanel): knowledge localization, allow tools on creation

* feat: experimental: allow 'priming' values before assistant is created, that would normally require an assistant_id to be defined

* chore: trim console logs and make more meaningful

* chore: toast messages

* fix(ci): date test

* feat: create file when uploading Assistant Avatar

* feat: file upload rate limiting from custom config with dynamic file route initialization

* refactor: use file upload limiters on post routes only

* refactor(fileConfig): add endpoints field for endpoint specific fileconfigs, add mergeConfig function, add tests

* refactor: fileConfig route, dynamic multer instances used on all '/' and '/images' POST routes, data service and query hook

* feat: supportedMimeTypesSchema, test for array of regex

* feat: configurable file config limits

* chore: clarify assistants file knowledge prereq.

* chore(useTextarea): default to localized 'Assistant' if assistant name is empty

* feat: configurable file limits and toggle file upload per endpoint

* fix(useUploadFileMutation): prevent updating assistant.files cache if file upload is a message_file attachment

* fix(AssistantSelect): set last selected assistant only when timeout successfully runs

* refactor(queries): disable assistant queries if assistants endpoint is not enabled

* chore(Switcher): add localization

* chore: pluralize `assistant` for `EModelEndpoint key and value

* feat: show/hide assistant UI components based on endpoint availability; librechat.yaml config for disabling builder section and setting polling/timeout intervals

* fix(compactEndpointSchemas): use EModelEndpoint for schema access

* feat(runAssistant): use configured values from `librechat.yaml` for `pollIntervalMs` and `timeout`

* fix: naming issue

* wip: revert landing

* 🎉 happy birthday LibreChat (#1768)

* happy birthday LibreChat

* Refactor endpoint condition in Landing component

* Update birthday message in Eng.tsx

* fix(/config): avoid nesting ternaries

* refactor(/config): check birthday

---------

Co-authored-by: Danny Avila <messagedaniel@protonmail.com>

* fix: landing

* fix: landing

* fix(useMessageHelpers): hardcoded check to use EModelEndpoint instead

* fix(ci): convo test revert to main

* fix(assistants/chat): fix issue where assistant_id was being saved as model for convo

* chore: added logging, promises racing to prevent longer timeouts, explicit setting of maxRetries and timeouts, robust catching of invalid abortRun params

* refactor: use recoil state for `showStopButton` and only show for assistants endpoint after syncing conversation data

* refactor: optimize abortRun strategy using localStorage, refactor `abortConversation` to use async/await and await the result, refactor how the abortKey cache is set for runs

* fix(checkMessageGaps): assign `assistant_id` to synced messages if defined; prevents UI from showing blank assistant for cancelled messages

* refactor: re-order sequence of chat route, only allow aborting messages after run is created, cancel abortRun if there was a cancelling error (likely due already cancelled in chat route), and add extra logging

* chore(typedefs): add httpAgent type to OpenAIClient

* refactor: use custom implementation of retrieving run with axios to allow for timing out run query

* fix(waitForRun): handle timed out run retrieval query

* refactor: update preset conditions:
- presets will retain settings when a different endpoint is selected; for existing convos, either when modular or is assistant switch
- no longer use `navigateToConvo` on preset select

* fix: temporary calculator hack as expects string input when invoked

* fix: cancel abortRun only when cancelling error is a result of the run already being cancelled

* chore: remove use of `fileMaxSizeMB` and total counterpart (redundant)

* docs: custom config documentation update

* docs: assistants api setup and dotenv, new custom config fields

* refactor(Switcher): make Assistant switcher sticky in SidePanel

* chore(useSSE): remove console log of data and message index

* refactor(AssistantPanel): button styling and add secondary select button to bottom of panel

* refactor(OpenAIClient): allow passing conversationId to RunManager through titleConvo and initializeLLM to properly record title context tokens used in cases where conversationId was not defined by the client

* feat(assistants): token tracking for assistant runs

* chore(spendTokens): improve logging

* feat: support/exclude specific assistant Ids

* chore: add update `librechat.example.yaml`, optimize `AppService` handling, new tests for `AppService`, optimize missing/outdate config logging

* chore: mount docker logs to root of project

* chore: condense axios errors

* chore: bump vite

* chore: vite hot reload fix using latest version

* chore(getOpenAIModels): sort instruct models to the end of models list

* fix(assistants): user provided key

* fix(assistants): user provided key, invalidate more queries on revoke

---------

Co-authored-by: Marco Beretta <81851188+Berry-13@users.noreply.github.com>
2024-02-13 20:42:27 -05:00

686 lines
22 KiB
JavaScript

const crypto = require('crypto');
const { supportsBalanceCheck, Constants } = require('librechat-data-provider');
const { getConvo, getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
const checkBalance = require('~/models/checkBalance');
const TextStream = require('./TextStream');
const { logger } = require('~/config');
class BaseClient {
constructor(apiKey, options = {}) {
this.apiKey = apiKey;
this.sender = options.sender ?? 'AI';
this.contextStrategy = null;
this.currentDateString = new Date().toLocaleDateString('en-us', {
year: 'numeric',
month: 'long',
day: 'numeric',
});
}
setOptions() {
throw new Error('Method \'setOptions\' must be implemented.');
}
getCompletion() {
throw new Error('Method \'getCompletion\' must be implemented.');
}
async sendCompletion() {
throw new Error('Method \'sendCompletion\' must be implemented.');
}
getSaveOptions() {
throw new Error('Subclasses must implement getSaveOptions');
}
async buildMessages() {
throw new Error('Subclasses must implement buildMessages');
}
async summarizeMessages() {
throw new Error('Subclasses attempted to call summarizeMessages without implementing it');
}
async getTokenCountForResponse(response) {
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', response);
}
async addPreviousAttachments(messages) {
return messages;
}
async recordTokenUsage({ promptTokens, completionTokens }) {
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', {
promptTokens,
completionTokens,
});
}
getBuildMessagesOptions() {
throw new Error('Subclasses must implement getBuildMessagesOptions');
}
async generateTextStream(text, onProgress, options = {}) {
const stream = new TextStream(text, options);
await stream.processTextStream(onProgress);
}
async setMessageOptions(opts = {}) {
if (opts && opts.replaceOptions) {
this.setOptions(opts);
}
const { isEdited, isContinued } = opts;
const user = opts.user ?? null;
this.user = user;
const saveOptions = this.getSaveOptions();
this.abortController = opts.abortController ?? new AbortController();
const conversationId = opts.conversationId ?? crypto.randomUUID();
const parentMessageId = opts.parentMessageId ?? Constants.NO_PARENT;
const userMessageId = opts.overrideParentMessageId ?? crypto.randomUUID();
let responseMessageId = opts.responseMessageId ?? crypto.randomUUID();
let head = isEdited ? responseMessageId : parentMessageId;
this.currentMessages = (await this.loadHistory(conversationId, head)) ?? [];
this.conversationId = conversationId;
if (isEdited && !isContinued) {
responseMessageId = crypto.randomUUID();
head = responseMessageId;
this.currentMessages[this.currentMessages.length - 1].messageId = head;
}
return {
...opts,
user,
head,
conversationId,
parentMessageId,
userMessageId,
responseMessageId,
saveOptions,
};
}
createUserMessage({ messageId, parentMessageId, conversationId, text }) {
return {
messageId,
parentMessageId,
conversationId,
sender: 'User',
text,
isCreatedByUser: true,
};
}
async handleStartMethods(message, opts) {
const {
user,
head,
conversationId,
parentMessageId,
userMessageId,
responseMessageId,
saveOptions,
} = await this.setMessageOptions(opts);
const userMessage = opts.isEdited
? this.currentMessages[this.currentMessages.length - 2]
: this.createUserMessage({
messageId: userMessageId,
parentMessageId,
conversationId,
text: message,
});
if (typeof opts?.getReqData === 'function') {
opts.getReqData({
userMessage,
conversationId,
responseMessageId,
});
}
if (typeof opts?.onStart === 'function') {
opts.onStart(userMessage);
}
return {
...opts,
user,
head,
conversationId,
responseMessageId,
saveOptions,
userMessage,
};
}
/**
* Adds instructions to the messages array. If the instructions object is empty or undefined,
* the original messages array is returned. Otherwise, the instructions are added to the messages
* array, preserving the last message at the end.
*
* @param {Array} messages - An array of messages.
* @param {Object} instructions - An object containing instructions to be added to the messages.
* @returns {Array} An array containing messages and instructions, or the original messages if instructions are empty.
*/
addInstructions(messages, instructions) {
const payload = [];
if (!instructions || Object.keys(instructions).length === 0) {
return messages;
}
if (messages.length > 1) {
payload.push(...messages.slice(0, -1));
}
payload.push(instructions);
if (messages.length > 0) {
payload.push(messages[messages.length - 1]);
}
return payload;
}
async handleTokenCountMap(tokenCountMap) {
if (this.currentMessages.length === 0) {
return;
}
for (let i = 0; i < this.currentMessages.length; i++) {
// Skip the last message, which is the user message.
if (i === this.currentMessages.length - 1) {
break;
}
const message = this.currentMessages[i];
const { messageId } = message;
const update = {};
if (messageId === tokenCountMap.summaryMessage?.messageId) {
logger.debug(`[BaseClient] Adding summary props to ${messageId}.`);
update.summary = tokenCountMap.summaryMessage.content;
update.summaryTokenCount = tokenCountMap.summaryMessage.tokenCount;
}
if (message.tokenCount && !update.summaryTokenCount) {
logger.debug(`[BaseClient] Skipping ${messageId}: already had a token count.`);
continue;
}
const tokenCount = tokenCountMap[messageId];
if (tokenCount) {
message.tokenCount = tokenCount;
update.tokenCount = tokenCount;
await this.updateMessageInDatabase({ messageId, ...update });
}
}
}
concatenateMessages(messages) {
return messages.reduce((acc, message) => {
const nameOrRole = message.name ?? message.role;
return acc + `${nameOrRole}:\n${message.content}\n\n`;
}, '');
}
/**
* This method processes an array of messages and returns a context of messages that fit within a specified token limit.
* It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
* If the token limit would be exceeded by adding a message, that message is not added to the context and remains in the original array.
* The method uses `push` and `pop` operations for efficient array manipulation, and reverses the context array at the end to maintain the original order of the messages.
*
* @param {Array} _messages - An array of messages, each with a `tokenCount` property. The messages should be ordered from oldest to newest.
* @param {number} [maxContextTokens] - The max number of tokens allowed in the context. If not provided, defaults to `this.maxContextTokens`.
* @returns {Object} An object with four properties: `context`, `summaryIndex`, `remainingContextTokens`, and `messagesToRefine`.
* `context` is an array of messages that fit within the token limit.
* `summaryIndex` is the index of the first message in the `messagesToRefine` array.
* `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context.
* `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
*/
async getMessagesWithinTokenLimit(_messages, maxContextTokens) {
// Every reply is primed with <|start|>assistant<|message|>, so we
// start with 3 tokens for the label after all messages have been counted.
let currentTokenCount = 3;
let summaryIndex = -1;
let remainingContextTokens = maxContextTokens ?? this.maxContextTokens;
const messages = [..._messages];
const context = [];
if (currentTokenCount < remainingContextTokens) {
while (messages.length > 0 && currentTokenCount < remainingContextTokens) {
const poppedMessage = messages.pop();
const { tokenCount } = poppedMessage;
if (poppedMessage && currentTokenCount + tokenCount <= remainingContextTokens) {
context.push(poppedMessage);
currentTokenCount += tokenCount;
} else {
messages.push(poppedMessage);
break;
}
}
}
const prunedMemory = messages;
summaryIndex = prunedMemory.length - 1;
remainingContextTokens -= currentTokenCount;
return {
context: context.reverse(),
remainingContextTokens,
messagesToRefine: prunedMemory,
summaryIndex,
};
}
async handleContextStrategy({ instructions, orderedMessages, formattedMessages }) {
let _instructions;
let tokenCount;
if (instructions) {
({ tokenCount, ..._instructions } = instructions);
}
_instructions && logger.debug('[BaseClient] instructions tokenCount: ' + tokenCount);
let payload = this.addInstructions(formattedMessages, _instructions);
let orderedWithInstructions = this.addInstructions(orderedMessages, instructions);
let { context, remainingContextTokens, messagesToRefine, summaryIndex } =
await this.getMessagesWithinTokenLimit(orderedWithInstructions);
logger.debug('[BaseClient] Context Count (1/2)', {
remainingContextTokens,
maxContextTokens: this.maxContextTokens,
});
let summaryMessage;
let summaryTokenCount;
let { shouldSummarize } = this;
// Calculate the difference in length to determine how many messages were discarded if any
const { length } = payload;
const diff = length - context.length;
const firstMessage = orderedWithInstructions[0];
const usePrevSummary =
shouldSummarize &&
diff === 1 &&
firstMessage?.summary &&
this.previous_summary.messageId === firstMessage.messageId;
if (diff > 0) {
payload = payload.slice(diff);
logger.debug(
`[BaseClient] Difference between original payload (${length}) and context (${context.length}): ${diff}`,
);
}
const latestMessage = orderedWithInstructions[orderedWithInstructions.length - 1];
if (payload.length === 0 && !shouldSummarize && latestMessage) {
throw new Error(
`Prompt token count of ${latestMessage.tokenCount} exceeds max token count of ${this.maxContextTokens}.`,
);
}
if (usePrevSummary) {
summaryMessage = { role: 'system', content: firstMessage.summary };
summaryTokenCount = firstMessage.summaryTokenCount;
payload.unshift(summaryMessage);
remainingContextTokens -= summaryTokenCount;
} else if (shouldSummarize && messagesToRefine.length > 0) {
({ summaryMessage, summaryTokenCount } = await this.summarizeMessages({
messagesToRefine,
remainingContextTokens,
}));
summaryMessage && payload.unshift(summaryMessage);
remainingContextTokens -= summaryTokenCount;
}
// Make sure to only continue summarization logic if the summary message was generated
shouldSummarize = summaryMessage && shouldSummarize;
logger.debug('[BaseClient] Context Count (2/2)', {
remainingContextTokens,
maxContextTokens: this.maxContextTokens,
});
let tokenCountMap = orderedWithInstructions.reduce((map, message, index) => {
const { messageId } = message;
if (!messageId) {
return map;
}
if (shouldSummarize && index === summaryIndex && !usePrevSummary) {
map.summaryMessage = { ...summaryMessage, messageId, tokenCount: summaryTokenCount };
}
map[messageId] = orderedWithInstructions[index].tokenCount;
return map;
}, {});
const promptTokens = this.maxContextTokens - remainingContextTokens;
logger.debug('[BaseClient] tokenCountMap:', tokenCountMap);
logger.debug('[BaseClient]', {
promptTokens,
remainingContextTokens,
payloadSize: payload.length,
maxContextTokens: this.maxContextTokens,
});
return { payload, tokenCountMap, promptTokens, messages: orderedWithInstructions };
}
async sendMessage(message, opts = {}) {
const { user, head, isEdited, conversationId, responseMessageId, saveOptions, userMessage } =
await this.handleStartMethods(message, opts);
const { generation = '' } = opts;
// It's not necessary to push to currentMessages
// depending on subclass implementation of handling messages
// When this is an edit, all messages are already in currentMessages, both user and response
if (isEdited) {
let latestMessage = this.currentMessages[this.currentMessages.length - 1];
if (!latestMessage) {
latestMessage = {
messageId: responseMessageId,
conversationId,
parentMessageId: userMessage.messageId,
isCreatedByUser: false,
model: this.modelOptions.model,
sender: this.sender,
text: generation,
};
this.currentMessages.push(userMessage, latestMessage);
} else {
latestMessage.text = generation;
}
} else {
this.currentMessages.push(userMessage);
}
let {
prompt: payload,
tokenCountMap,
promptTokens,
} = await this.buildMessages(
this.currentMessages,
// When the userMessage is pushed to currentMessages, the parentMessage is the userMessageId.
// this only matters when buildMessages is utilizing the parentMessageId, and may vary on implementation
isEdited ? head : userMessage.messageId,
this.getBuildMessagesOptions(opts),
opts,
);
if (tokenCountMap) {
logger.debug('[BaseClient] tokenCountMap', tokenCountMap);
if (tokenCountMap[userMessage.messageId]) {
userMessage.tokenCount = tokenCountMap[userMessage.messageId];
logger.debug('[BaseClient] userMessage', userMessage);
}
this.handleTokenCountMap(tokenCountMap);
}
if (!isEdited) {
await this.saveMessageToDatabase(userMessage, saveOptions, user);
}
if (
isEnabled(process.env.CHECK_BALANCE) &&
supportsBalanceCheck[this.options.endpointType ?? this.options.endpoint]
) {
await checkBalance({
req: this.options.req,
res: this.options.res,
txData: {
user: this.user,
tokenType: 'prompt',
amount: promptTokens,
model: this.modelOptions.model,
endpoint: this.options.endpoint,
endpointTokenConfig: this.options.endpointTokenConfig,
},
});
}
const completion = await this.sendCompletion(payload, opts);
const responseMessage = {
messageId: responseMessageId,
conversationId,
parentMessageId: userMessage.messageId,
isCreatedByUser: false,
isEdited,
model: this.modelOptions.model,
sender: this.sender,
text: addSpaceIfNeeded(generation) + completion,
promptTokens,
};
if (
tokenCountMap &&
this.recordTokenUsage &&
this.getTokenCountForResponse &&
this.getTokenCount
) {
responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage);
const completionTokens = this.getTokenCount(completion);
await this.recordTokenUsage({ promptTokens, completionTokens });
}
await this.saveMessageToDatabase(responseMessage, saveOptions, user);
delete responseMessage.tokenCount;
return responseMessage;
}
async getConversation(conversationId, user = null) {
return await getConvo(user, conversationId);
}
async loadHistory(conversationId, parentMessageId = null) {
logger.debug('[BaseClient] Loading history:', { conversationId, parentMessageId });
const messages = (await getMessages({ conversationId })) ?? [];
if (messages.length === 0) {
return [];
}
let mapMethod = null;
if (this.getMessageMapMethod) {
mapMethod = this.getMessageMapMethod();
}
let _messages = this.constructor.getMessagesForConversation({
messages,
parentMessageId,
mapMethod,
});
_messages = await this.addPreviousAttachments(_messages);
if (!this.shouldSummarize) {
return _messages;
}
// Find the latest message with a 'summary' property
for (let i = _messages.length - 1; i >= 0; i--) {
if (_messages[i]?.summary) {
this.previous_summary = _messages[i];
break;
}
}
if (this.previous_summary) {
const { messageId, summary, tokenCount, summaryTokenCount } = this.previous_summary;
logger.debug('[BaseClient] Previous summary:', {
messageId,
summary,
tokenCount,
summaryTokenCount,
});
}
return _messages;
}
async saveMessageToDatabase(message, endpointOptions, user = null) {
await saveMessage({ ...message, endpoint: this.options.endpoint, user, unfinished: false });
await saveConvo(user, {
conversationId: message.conversationId,
endpoint: this.options.endpoint,
endpointType: this.options.endpointType,
...endpointOptions,
});
}
async updateMessageInDatabase(message) {
await updateMessage(message);
}
/**
* Iterate through messages, building an array based on the parentMessageId.
*
* This function constructs a conversation thread by traversing messages from a given parentMessageId up to the root message.
* It handles cyclic references by ensuring that a message is not processed more than once.
* If the 'summary' option is set to true and a message has a 'summary' property:
* - The message's 'role' is set to 'system'.
* - The message's 'text' is set to its 'summary'.
* - If the message has a 'summaryTokenCount', the message's 'tokenCount' is set to 'summaryTokenCount'.
* The traversal stops at the message with the 'summary' property.
*
* Each message object should have an 'id' or 'messageId' property and may have a 'parentMessageId' property.
* The 'parentMessageId' is the ID of the message that the current message is a reply to.
* If 'parentMessageId' is not present, null, or is Constants.NO_PARENT,
* the message is considered a root message.
*
* @param {Object} options - The options for the function.
* @param {Array} options.messages - An array of message objects. Each object should have either an 'id' or 'messageId' property, and may have a 'parentMessageId' property.
* @param {string} options.parentMessageId - The ID of the parent message to start the traversal from.
* @param {Function} [options.mapMethod] - An optional function to map over the ordered messages. If provided, it will be applied to each message in the resulting array.
* @param {boolean} [options.summary=false] - If set to true, the traversal modifies messages with 'summary' and 'summaryTokenCount' properties and stops at the message with a 'summary' property.
* @returns {Array} An array containing the messages in the order they should be displayed, starting with the most recent message with a 'summary' property if the 'summary' option is true, and ending with the message identified by 'parentMessageId'.
*/
static getMessagesForConversation({
messages,
parentMessageId,
mapMethod = null,
summary = false,
}) {
if (!messages || messages.length === 0) {
return [];
}
const orderedMessages = [];
let currentMessageId = parentMessageId;
const visitedMessageIds = new Set();
while (currentMessageId) {
if (visitedMessageIds.has(currentMessageId)) {
break;
}
const message = messages.find((msg) => {
const messageId = msg.messageId ?? msg.id;
return messageId === currentMessageId;
});
visitedMessageIds.add(currentMessageId);
if (!message) {
break;
}
if (summary && message.summary) {
message.role = 'system';
message.text = message.summary;
}
if (summary && message.summaryTokenCount) {
message.tokenCount = message.summaryTokenCount;
}
orderedMessages.push(message);
if (summary && message.summary) {
break;
}
currentMessageId =
message.parentMessageId === Constants.NO_PARENT ? null : message.parentMessageId;
}
orderedMessages.reverse();
if (mapMethod) {
return orderedMessages.map(mapMethod);
}
return orderedMessages;
}
/**
* Algorithm adapted from "6. Counting tokens for chat API calls" of
* https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
*
* An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
* In our implementation, this is accounted for in the getMessagesWithinTokenLimit method.
*
* The content parts example was adapted from the following example:
* https://github.com/openai/openai-cookbook/pull/881/files
*
* Note: image token calculation is to be done elsewhere where we have access to the image metadata
*
* @param {Object} message
*/
getTokenCountForMessage(message) {
// Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
let tokensPerMessage = 3;
let tokensPerName = 1;
if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
tokensPerMessage = 4;
tokensPerName = -1;
}
const processValue = (value) => {
if (Array.isArray(value)) {
for (let item of value) {
if (!item || !item.type || item.type === 'image_url') {
continue;
}
const nestedValue = item[item.type];
if (!nestedValue) {
continue;
}
processValue(nestedValue);
}
} else {
numTokens += this.getTokenCount(value);
}
};
let numTokens = tokensPerMessage;
for (let [key, value] of Object.entries(message)) {
processValue(value);
if (key === 'name') {
numTokens += tokensPerName;
}
}
return numTokens;
}
async sendPayload(payload, opts = {}) {
if (opts && typeof opts === 'object') {
this.setOptions(opts);
}
return await this.sendCompletion(payload, opts);
}
}
module.exports = BaseClient;