LibreChat/api/app/clients/GoogleClient.js
Danny Avila ecd63eb9f1
feat: Assistants API, General File Support, Side Panel, File Explorer (#1696)
* feat: assistant name/icon in Landing & Header

* feat: assistname in textarea placeholder, and use `Assistant` as default name

* feat: display non-image files in user messages

* fix: only render files if files.length is > 0

* refactor(config -> file-config): move file related configuration values to separate module, add excel types

* chore: spreadsheet file rendering

* fix(Landing): dark mode style for Assistant Name

* refactor: move progress incrementing to own hook, start smaller, cap near limit \(1\)

* refactor(useContentHandler): add empty Text part if last part was completed tool or image

* chore: add accordion trigger border styling for dark mode

* feat: Assistant Builder model selection

* chore: use Spinner when Assistant is mutating

* fix(get/assistants): return correct response object `AssistantListResponse`

* refactor(Spinner): pass size as prop

* refactor: make assistant crud mutations optimistic, add types for options

* chore: remove assistants route and view

* chore: move assistant builder components to separate directory

* feat(ContextButton): delete Assistant via context button/dialog, add localization

* refactor: conditionally show use and context menu buttons, add localization for create assistant

* feat: save side panel states to localStorage

* style(SidePanel): improve avatar menu and assistant select styling for dark mode

* refactor: make NavToggle reusable for either side (left or right), add SidePanel Toggle with ability to close it completely

* fix: resize handle and navToggle behavior

* fix(/avatar/:assistant_id): await `deleteFile` and assign unique name to uploaded image

* WIP: file UI components from PR #576

* refactor(OpenAIMinimalIcon): pass className

* feat: formatDate helper fn

* feat: DataTableColumnHeader

* feat: add row selection, formatted row values, number of rows selected

* WIP: add files to Side panel temporarily

* feat: `LB_QueueAsyncCall`: Leaky Bucket queue for external APIs, use in `processDeleteRequest`

* fix(TFile): correct `source` type with `FileSources`

* fix(useFileHandling): use `continue` instead of return when iterating multiple files, add file type to extendedFile

* chore: add generic setter type

* refactor(processDeleteRequest): settle promises to prevent rejections from processing deletions, log errors

* feat: `useFileDeletion` to reuse file deletion logic

* refactor(useFileDeletion): make `setFiles` an optional param and use object as param

* feat: useDeleteFilesFromTable

* feat: use real `files` data and add deletion action to data table

* fix(Table): make headers sticky

* feat: add dynamic filtering for columns; only show to user Host or OpenAI storage type

* style(DropdownMenu): replace `slate` with `gray`

* style(DataTable): apply dark mode themes and other misc styling

* style(Columns): add color to OpenAI Storage option

* refactor(FileContainer): make file preview reusable

* refactor(Images): make image preview reusable

* refactor(FilePreview): make file prop optional for FileIcon and FilePreview, fix relative style

* feat(Columns): add file/image previews, set a minimum size to show for file size in bytes

* WIP: File Panel with real files and formatted

* feat: open files dialog from panel

* style: file data table mobile and general column styling fixes

* refactor(api/files): return files sorted by the most recently updated

* refactor: provide fileMap through context to prevent re-selecting files to map in different areas; remove unused imports commented out in PanelColumns

* refactor(ExtendFile): make File type optional, add `attached` to prevent attached files from being deleted on remove, make Message.files a partial TFile type

* feat: attach files through file panel

* refactor(useFileHandling): move files to the start of cache list when uploaded

* refactor(useDeleteFilesMutation): delete files from cache when successfully deleted from server

* fix(FileRow): handle possible edge case of duplication due to attaching recently uploaded file

* style(SidePanel): make resize grip border transparent, remove unnecessary styling on close sidepanel button

* feat: action utilities and tests

* refactor(actions): add `ValidationResult` type and change wording for no server URL found

* refactor(actions): check for empty server URL

* fix(data-provider): revert tsconfig to fix type issue resolution

* feat(client): first pass of actions input for assistants

* refactor(FunctionSignature): change method to output object instead of string

* refactor(models/Assistant): add actions field to schema, use searchParams object for methods, and add `getAssistant`

* feat: post actions input first pass
- create new Action document
- add actions to Assistant DB document
- create /action/:assistant_id POST route
- pass more props down from PanelSwitcher, derive assistant_id from switcher
- move privacy policy to ActionInput
- reset data on input change/validation
- add `useUpdateAction`
- conform FunctionSignature type to FunctionTool
- add action, assistant doc, update hook related types

* refactor: optimize assistant/actions relationship
- past domain in metadata as hostname and not a URL
- include domain in tool name
- add `getActions` for actions retrieval by user
- add `getAssistants` for assistant docs retrieval by user
- add `assistant_id` to Action schema
- move actions to own module as a subroute to `api/assistants`
- add `useGetActionsQuery` and `useGetAssistantDocsQuery` hooks
- fix Action type def

* feat: show assistant actions in assistant builder

* feat: switch to actions on action click, editing action styling

* fix: add Assistant state for builder panel to allow immediate selection of newly created assistants as well as retaining the current assistant when switching to a different panel within the builder

* refactor(SidePanel/NavToggle): offset less from right when SidePanel is completely collapsed

* chore: rename `processActions` -> `processRequiredActions`

* chore: rename Assistant API Action to RequiredAction

* refactor(actions): avoid nesting actual API params under generic `requestBody` to optimize LLM token usage

* fix(handleTools): avoid calling `validTool` if not defined, add optional param to skip the loading of specs, which throws an error in the context of assistants

* WIP: working first pass of toolCalls generated from openapi specs

* WIP: first pass ToolCall styling

* feat: programmatic iv encryption/decryption helpers

* fix: correct ActionAuth types/enums, and define type for AuthForm

* feat: encryption/decryption helpers for Action AuthMetadata

* refactor(getActions): remove sensitive fields from query response

* refactor(POST/actions): encrypt and remove sensitive fields from mutation response

* fix(ActionService): change ESM import to CJS

* feat: frontend auth handling for actions + optimistic update on action update/creation

* refactor(actions): use the correct variables and types for setAuth method

* refactor: POST /:assistant_id action can now handle updating an existing action, add `saved_auth_fields` to determine when user explicitly saves new auth creds. only send auth metadata if user explicitly saved fields

* refactor(createActionTool): catch errors and send back meaningful error message, add flag to `getActions` to determine whether to retrieve sensitive values or not

* refactor(ToolService): add `action` property to ToolCall PartMetadata to determine if the tool call was an action, fix parsing function name issue with actionDelimiter

* fix(ActionRequest): use URL class to correctly join endpoint parts for `execute` call

* feat: delete assistant actions

* refactor: conditionally show Available actions

* refactor: show `retrieval` and `code_interpreter` as Capabilities, swap `Switch` for `Checkbox`

* chore: remove shadow-stroke from messages

* WIP: first pass of Assistants Knowledge attachments

* refactor: remove AssistantsProvider in favor of FormProvider, fix selectedAssistant re-render bug, map Assistant file_ids to files via fileMap, initialize Knowledge component with mapped files if any exist

* fix: prevent deleting files on assistant file upload

* chore: remove console.log

* refactor(useUploadFileMutation): update files and assistants cache on upload

* chore: disable oauth option as not supported yet

* feat: cancel assistant runs

* refactor: initialize OpenAI client with helper function, resolve all related circular dependencies

* fix(DALL-E): initialization

* fix(process): openai client initialization

* fix: select an existing Assistant when the active one is deleted

* chore: allow attaching files for assistant endpoint, send back relevant OpenAI error message when uploading, deconstruct openAI initialization correctly, add `message_file` to formData when a file is attached to the message but not the assistant

* fix: add assistant_id on newConvo

* fix(initializeClient): import fix

* chore: swap setAssistant for setOption in useEffect

* fix(DALL-E): add processFileURL to loadTools call

* chore: add customConfig to debug logs

* feat: delete threads on convo delete

* chore: replace Assistants icon

* chore: remove console.dir() in `abortRun`

* feat(AssistantService): accumulate text values from run in openai.responseText

* feat: titling for assistants endpoint

* chore: move panel file components to appropriate directory, add file checks for attaching files, change icon for Attach Files

* refactor: add localizations to tools, plugins, add condition for adding/remove user plugins so tool selections don't affect this value

* chore: disable `import from url` action for now

* chore: remove textMimeTypes from default fileConfig for now

* fix: catch tool errors and send as outputs with error messages

* fix: React warning about button as descendant of button

* style: retrieval and cancelled icon

* WIP: pass isSubmitting to Parts, use InProgressCall to display cancelled tool calls correctly, show domain/function name

* fix(meilisearch): fix `postSaveHook` issue where indexing expects a mongo document, and join all text content parts for meili indexing

* ci: fix dall-e tests

* ci: fix client tests

* fix: button types in actions panel

* fix: plugin auth form persisting across tool selections

* fix(ci): update AppService spec with `loadAndFormatTools`

* fix(clearConvos): add id check earlier on

* refactor(AssistantAvatar): set previewURL dynamically when emtadata.avatar changes

* feat(assistants): addTitle cache setting

* fix(useSSE): resolve rebase conflicts

* fix: delete mutation

* style(SidePanel): make grip visible on active and hover, invisible otherwise

* ci: add data-provider tests to workflow, also update eslint/tsconfig to recognize specs, and add `text/csv` to fileConfig

* fix: handle edge case where auth object is undefined, and log errors

* refactor(actions): resolve  schemas, add tests for resolving refs, import specs from separate file for tests

* chore: remove comment

* fix(ActionsInput): re-render bug when initializing states with action fields

* fix(patch/assistant): filter undefined tools

* chore: add logging for errors in assistants routes

* fix(updateAssistant): map actions to functions to avoid overwriting

* fix(actions): properly handle GET paths

* fix(convos): unhandled delete thread exception

* refactor(AssistantService): pass both thread_id and conversationId when sending intermediate assistant messages, remove `mapMessagesToSteps` from AssistantService

* refactor(useSSE): replace all messages with runMessages and pass latestMessageId to abortRun; fix(checkMessageGaps): include tool calls when  syncing messages

* refactor(assistants/chat): invoke `createOnTextProgress` after thread creation

* chore: add typing

* style: sidepanel styling

* style: action tool call domain styling

* feat(assistants): default models, limit retrieval to certain models, add env variables to to env.example

* feat: assistants api key in EndpointService

* refactor: set assistant model to conversation on assistant switch

* refactor: set assistant model to conversation on assistant select from panel

* fix(retrieveAndProcessFile): catch attempt to download file with `assistant` purpose which is not allowed; add logging

* feat: retrieval styling, handling, and logging

* chore: rename ASSISTANTS_REVERSE_PROXY to ASSISTANTS_BASE_URL

* feat: FileContext for file metadata

* feat: context file mgmt and filtering

* style(Select): hover/rounded changes

* refactor: explicit conversation switch, endpoint dependent, through `useSelectAssistant`, which does not create new chat if current endpoint is assistant endpoint

* fix(AssistantAvatar): make empty previewURL if no avatar present

* refactor: side panel mobile styling

* style: merge tool and action section, optimize mobile styling for action/tool buttons

* fix: localStorage issues

* fix(useSelectAssistant): invoke react query hook directly in select hook as Map was not being updated in time

* style: light mode fixes

* fix: prevent sidepanel nav styling from shifting layout up

* refactor: change default layout (collapsed by default)

* style: mobile optimization of DataTable

* style: datatable

* feat: client-side hide right-side panel

* chore(useNewConvo): add partial typing for preset

* fix(useSelectAssistant): pass correct model name by using template as preset

* WIP: assistant presets

* refactor(ToolService): add native solution for `TavilySearchResults` and log tool output errors

* refactor: organize imports and use native TavilySearchResults

* fix(TavilySearchResults): stringify result

* fix(ToolCall): show tool call outputs when not an action

* chore: rename Prompt Prefix to custom instructions (in user facing text only)

* refactor(EditPresetDialog): Optimize setting title by debouncing, reset preset on dialog close to avoid state mixture

* feat: add `presetOverride` to overwrite active conversation settings when saving a Preset (relevant for client side updates only)

* feat: Assistant preset settings (client-side)

* fix(Switcher): only set assistant_id and model if current endpoint is Assistants

* feat: use `useDebouncedInput` for updating conversation settings, starting with EditPresetDialog title setting and Assistant instructions setting

* feat(Assistants): add instructions field to settings

* feat(chat/assistants): pass conversation settings to run body

* wip: begin localization and only allow actions if the assistant is created

* refactor(AssistantsPanel): knowledge localization, allow tools on creation

* feat: experimental: allow 'priming' values before assistant is created, that would normally require an assistant_id to be defined

* chore: trim console logs and make more meaningful

* chore: toast messages

* fix(ci): date test

* feat: create file when uploading Assistant Avatar

* feat: file upload rate limiting from custom config with dynamic file route initialization

* refactor: use file upload limiters on post routes only

* refactor(fileConfig): add endpoints field for endpoint specific fileconfigs, add mergeConfig function, add tests

* refactor: fileConfig route, dynamic multer instances used on all '/' and '/images' POST routes, data service and query hook

* feat: supportedMimeTypesSchema, test for array of regex

* feat: configurable file config limits

* chore: clarify assistants file knowledge prereq.

* chore(useTextarea): default to localized 'Assistant' if assistant name is empty

* feat: configurable file limits and toggle file upload per endpoint

* fix(useUploadFileMutation): prevent updating assistant.files cache if file upload is a message_file attachment

* fix(AssistantSelect): set last selected assistant only when timeout successfully runs

* refactor(queries): disable assistant queries if assistants endpoint is not enabled

* chore(Switcher): add localization

* chore: pluralize `assistant` for `EModelEndpoint key and value

* feat: show/hide assistant UI components based on endpoint availability; librechat.yaml config for disabling builder section and setting polling/timeout intervals

* fix(compactEndpointSchemas): use EModelEndpoint for schema access

* feat(runAssistant): use configured values from `librechat.yaml` for `pollIntervalMs` and `timeout`

* fix: naming issue

* wip: revert landing

* 🎉 happy birthday LibreChat (#1768)

* happy birthday LibreChat

* Refactor endpoint condition in Landing component

* Update birthday message in Eng.tsx

* fix(/config): avoid nesting ternaries

* refactor(/config): check birthday

---------

Co-authored-by: Danny Avila <messagedaniel@protonmail.com>

* fix: landing

* fix: landing

* fix(useMessageHelpers): hardcoded check to use EModelEndpoint instead

* fix(ci): convo test revert to main

* fix(assistants/chat): fix issue where assistant_id was being saved as model for convo

* chore: added logging, promises racing to prevent longer timeouts, explicit setting of maxRetries and timeouts, robust catching of invalid abortRun params

* refactor: use recoil state for `showStopButton` and only show for assistants endpoint after syncing conversation data

* refactor: optimize abortRun strategy using localStorage, refactor `abortConversation` to use async/await and await the result, refactor how the abortKey cache is set for runs

* fix(checkMessageGaps): assign `assistant_id` to synced messages if defined; prevents UI from showing blank assistant for cancelled messages

* refactor: re-order sequence of chat route, only allow aborting messages after run is created, cancel abortRun if there was a cancelling error (likely due already cancelled in chat route), and add extra logging

* chore(typedefs): add httpAgent type to OpenAIClient

* refactor: use custom implementation of retrieving run with axios to allow for timing out run query

* fix(waitForRun): handle timed out run retrieval query

* refactor: update preset conditions:
- presets will retain settings when a different endpoint is selected; for existing convos, either when modular or is assistant switch
- no longer use `navigateToConvo` on preset select

* fix: temporary calculator hack as expects string input when invoked

* fix: cancel abortRun only when cancelling error is a result of the run already being cancelled

* chore: remove use of `fileMaxSizeMB` and total counterpart (redundant)

* docs: custom config documentation update

* docs: assistants api setup and dotenv, new custom config fields

* refactor(Switcher): make Assistant switcher sticky in SidePanel

* chore(useSSE): remove console log of data and message index

* refactor(AssistantPanel): button styling and add secondary select button to bottom of panel

* refactor(OpenAIClient): allow passing conversationId to RunManager through titleConvo and initializeLLM to properly record title context tokens used in cases where conversationId was not defined by the client

* feat(assistants): token tracking for assistant runs

* chore(spendTokens): improve logging

* feat: support/exclude specific assistant Ids

* chore: add update `librechat.example.yaml`, optimize `AppService` handling, new tests for `AppService`, optimize missing/outdate config logging

* chore: mount docker logs to root of project

* chore: condense axios errors

* chore: bump vite

* chore: vite hot reload fix using latest version

* chore(getOpenAIModels): sort instruct models to the end of models list

* fix(assistants): user provided key

* fix(assistants): user provided key, invalidate more queries on revoke

---------

Co-authored-by: Marco Beretta <81851188+Berry-13@users.noreply.github.com>
2024-02-13 20:42:27 -05:00

597 lines
19 KiB
JavaScript

const { google } = require('googleapis');
const { Agent, ProxyAgent } = require('undici');
const { GoogleVertexAI } = require('langchain/llms/googlevertexai');
const { ChatGoogleGenerativeAI } = require('@langchain/google-genai');
const { ChatGoogleVertexAI } = require('langchain/chat_models/googlevertexai');
const { AIMessage, HumanMessage, SystemMessage } = require('langchain/schema');
const { encodeAndFormat } = require('~/server/services/Files/images');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const {
validateVisionModel,
getResponseSender,
endpointSettings,
EModelEndpoint,
AuthKeys,
} = require('librechat-data-provider');
const { getModelMaxTokens } = require('~/utils');
const { formatMessage } = require('./prompts');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
const loc = 'us-central1';
const publisher = 'google';
const endpointPrefix = `https://${loc}-aiplatform.googleapis.com`;
// const apiEndpoint = loc + '-aiplatform.googleapis.com';
const tokenizersCache = {};
const settings = endpointSettings[EModelEndpoint.google];
class GoogleClient extends BaseClient {
constructor(credentials, options = {}) {
super('apiKey', options);
let creds = {};
if (typeof credentials === 'string') {
creds = JSON.parse(credentials);
} else if (credentials) {
creds = credentials;
}
const serviceKey = creds[AuthKeys.GOOGLE_SERVICE_KEY] ?? {};
this.serviceKey =
serviceKey && typeof serviceKey === 'string' ? JSON.parse(serviceKey) : serviceKey ?? {};
this.client_email = this.serviceKey.client_email;
this.private_key = this.serviceKey.private_key;
this.project_id = this.serviceKey.project_id;
this.access_token = null;
this.apiKey = creds[AuthKeys.GOOGLE_API_KEY];
if (options.skipSetOptions) {
return;
}
this.setOptions(options);
}
/* Google specific methods */
constructUrl() {
return `${endpointPrefix}/v1/projects/${this.project_id}/locations/${loc}/publishers/${publisher}/models/${this.modelOptions.model}:serverStreamingPredict`;
}
async getClient() {
const scopes = ['https://www.googleapis.com/auth/cloud-platform'];
const jwtClient = new google.auth.JWT(this.client_email, null, this.private_key, scopes);
jwtClient.authorize((err) => {
if (err) {
logger.error('jwtClient failed to authorize', err);
throw err;
}
});
return jwtClient;
}
async getAccessToken() {
const scopes = ['https://www.googleapis.com/auth/cloud-platform'];
const jwtClient = new google.auth.JWT(this.client_email, null, this.private_key, scopes);
return new Promise((resolve, reject) => {
jwtClient.authorize((err, tokens) => {
if (err) {
logger.error('jwtClient failed to authorize', err);
reject(err);
} else {
resolve(tokens.access_token);
}
});
});
}
/* Required Client methods */
setOptions(options) {
if (this.options && !this.options.replaceOptions) {
// nested options aren't spread properly, so we need to do this manually
this.options.modelOptions = {
...this.options.modelOptions,
...options.modelOptions,
};
delete options.modelOptions;
// now we can merge options
this.options = {
...this.options,
...options,
};
} else {
this.options = options;
}
this.options.examples = (this.options.examples ?? [])
.filter((ex) => ex)
.filter((obj) => obj.input.content !== '' && obj.output.content !== '');
const modelOptions = this.options.modelOptions || {};
this.modelOptions = {
...modelOptions,
// set some good defaults (check for undefined in some cases because they may be 0)
model: modelOptions.model || settings.model.default,
temperature:
typeof modelOptions.temperature === 'undefined'
? settings.temperature.default
: modelOptions.temperature,
topP: typeof modelOptions.topP === 'undefined' ? settings.topP.default : modelOptions.topP,
topK: typeof modelOptions.topK === 'undefined' ? settings.topK.default : modelOptions.topK,
// stop: modelOptions.stop // no stop method for now
};
if (this.options.attachments) {
this.modelOptions.model = 'gemini-pro-vision';
}
// TODO: as of 12/14/23, only gemini models are "Generative AI" models provided by Google
this.isGenerativeModel = this.modelOptions.model.includes('gemini');
this.isVisionModel = validateVisionModel(this.modelOptions.model);
const { isGenerativeModel } = this;
if (this.isVisionModel && !this.options.attachments) {
this.modelOptions.model = 'gemini-pro';
this.isVisionModel = false;
}
this.isChatModel = !isGenerativeModel && this.modelOptions.model.includes('chat');
const { isChatModel } = this;
this.isTextModel =
!isGenerativeModel && !isChatModel && /code|text/.test(this.modelOptions.model);
const { isTextModel } = this;
this.maxContextTokens = getModelMaxTokens(this.modelOptions.model, EModelEndpoint.google);
// The max prompt tokens is determined by the max context tokens minus the max response tokens.
// Earlier messages will be dropped until the prompt is within the limit.
this.maxResponseTokens = this.modelOptions.maxOutputTokens || settings.maxOutputTokens.default;
if (this.maxContextTokens > 32000) {
this.maxContextTokens = this.maxContextTokens - this.maxResponseTokens;
}
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
throw new Error(
`maxPromptTokens + maxOutputTokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${
this.maxPromptTokens + this.maxResponseTokens
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`,
);
}
this.sender =
this.options.sender ??
getResponseSender({
model: this.modelOptions.model,
endpoint: EModelEndpoint.google,
modelLabel: this.options.modelLabel,
});
this.userLabel = this.options.userLabel || 'User';
this.modelLabel = this.options.modelLabel || 'Assistant';
if (isChatModel || isGenerativeModel) {
// Use these faux tokens to help the AI understand the context since we are building the chat log ourselves.
// Trying to use "<|im_start|>" causes the AI to still generate "<" or "<|" at the end sometimes for some reason,
// without tripping the stop sequences, so I'm using "||>" instead.
this.startToken = '||>';
this.endToken = '';
this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
} else if (isTextModel) {
this.startToken = '||>';
this.endToken = '';
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true, {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
});
} else {
// Previously I was trying to use "<|endoftext|>" but there seems to be some bug with OpenAI's token counting
// system that causes only the first "<|endoftext|>" to be counted as 1 token, and the rest are not treated
// as a single token. So we're using this instead.
this.startToken = '||>';
this.endToken = '';
try {
this.gptEncoder = this.constructor.getTokenizer(this.modelOptions.model, true);
} catch {
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true);
}
}
if (!this.modelOptions.stop) {
const stopTokens = [this.startToken];
if (this.endToken && this.endToken !== this.startToken) {
stopTokens.push(this.endToken);
}
stopTokens.push(`\n${this.userLabel}:`);
stopTokens.push('<|diff_marker|>');
// I chose not to do one for `modelLabel` because I've never seen it happen
this.modelOptions.stop = stopTokens;
}
if (this.options.reverseProxyUrl) {
this.completionsUrl = this.options.reverseProxyUrl;
} else {
this.completionsUrl = this.constructUrl();
}
return this;
}
formatMessages() {
return ((message) => ({
author: message?.author ?? (message.isCreatedByUser ? this.userLabel : this.modelLabel),
content: message?.content ?? message.text,
})).bind(this);
}
async buildVisionMessages(messages = [], parentMessageId) {
const { prompt } = await this.buildMessagesPrompt(messages, parentMessageId);
const attachments = await this.options.attachments;
const { files, image_urls } = await encodeAndFormat(
this.options.req,
attachments.filter((file) => file.type.includes('image')),
EModelEndpoint.google,
);
const latestMessage = { ...messages[messages.length - 1] };
latestMessage.image_urls = image_urls;
this.options.attachments = files;
latestMessage.text = prompt;
const payload = {
instances: [
{
messages: [new HumanMessage(formatMessage({ message: latestMessage }))],
},
],
parameters: this.modelOptions,
};
return { prompt: payload };
}
async buildMessages(messages = [], parentMessageId) {
if (!this.isGenerativeModel && !this.project_id) {
throw new Error(
'[GoogleClient] a Service Account JSON Key is required for PaLM 2 and Codey models (Vertex AI)',
);
} else if (this.isGenerativeModel && (!this.apiKey || this.apiKey === 'user_provided')) {
throw new Error(
'[GoogleClient] an API Key is required for Gemini models (Generative Language API)',
);
}
if (this.options.attachments) {
return this.buildVisionMessages(messages, parentMessageId);
}
if (this.isTextModel) {
return this.buildMessagesPrompt(messages, parentMessageId);
}
let payload = {
instances: [
{
messages: messages
.map(this.formatMessages())
.map((msg) => ({ ...msg, role: msg.author === 'User' ? 'user' : 'assistant' }))
.map((message) => formatMessage({ message, langChain: true })),
},
],
parameters: this.modelOptions,
};
if (this.options.promptPrefix) {
payload.instances[0].context = this.options.promptPrefix;
}
if (this.options.examples.length > 0) {
payload.instances[0].examples = this.options.examples;
}
logger.debug('[GoogleClient] buildMessages', payload);
return { prompt: payload };
}
async buildMessagesPrompt(messages, parentMessageId) {
const orderedMessages = this.constructor.getMessagesForConversation({
messages,
parentMessageId,
});
logger.debug('[GoogleClient]', {
orderedMessages,
parentMessageId,
});
const formattedMessages = orderedMessages.map((message) => ({
author: message.isCreatedByUser ? this.userLabel : this.modelLabel,
content: message?.content ?? message.text,
}));
let lastAuthor = '';
let groupedMessages = [];
for (let message of formattedMessages) {
// If last author is not same as current author, add to new group
if (lastAuthor !== message.author) {
groupedMessages.push({
author: message.author,
content: [message.content],
});
lastAuthor = message.author;
// If same author, append content to the last group
} else {
groupedMessages[groupedMessages.length - 1].content.push(message.content);
}
}
let identityPrefix = '';
if (this.options.userLabel) {
identityPrefix = `\nHuman's name: ${this.options.userLabel}`;
}
if (this.options.modelLabel) {
identityPrefix = `${identityPrefix}\nYou are ${this.options.modelLabel}`;
}
let promptPrefix = (this.options.promptPrefix || '').trim();
if (promptPrefix) {
// If the prompt prefix doesn't end with the end token, add it.
if (!promptPrefix.endsWith(`${this.endToken}`)) {
promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`;
}
promptPrefix = `\nContext:\n${promptPrefix}`;
}
if (identityPrefix) {
promptPrefix = `${identityPrefix}${promptPrefix}`;
}
// Prompt AI to respond, empty if last message was from AI
let isEdited = lastAuthor === this.modelLabel;
const promptSuffix = isEdited ? '' : `${promptPrefix}\n\n${this.modelLabel}:\n`;
let currentTokenCount = isEdited
? this.getTokenCount(promptPrefix)
: this.getTokenCount(promptSuffix);
let promptBody = '';
const maxTokenCount = this.maxPromptTokens;
const context = [];
// Iterate backwards through the messages, adding them to the prompt until we reach the max token count.
// Do this within a recursive async function so that it doesn't block the event loop for too long.
// Also, remove the next message when the message that puts us over the token limit is created by the user.
// Otherwise, remove only the exceeding message. This is due to Anthropic's strict payload rule to start with "Human:".
const nextMessage = {
remove: false,
tokenCount: 0,
messageString: '',
};
const buildPromptBody = async () => {
if (currentTokenCount < maxTokenCount && groupedMessages.length > 0) {
const message = groupedMessages.pop();
const isCreatedByUser = message.author === this.userLabel;
// Use promptPrefix if message is edited assistant'
const messagePrefix =
isCreatedByUser || !isEdited
? `\n\n${message.author}:`
: `${promptPrefix}\n\n${message.author}:`;
const messageString = `${messagePrefix}\n${message.content}${this.endToken}\n`;
let newPromptBody = `${messageString}${promptBody}`;
context.unshift(message);
const tokenCountForMessage = this.getTokenCount(messageString);
const newTokenCount = currentTokenCount + tokenCountForMessage;
if (!isCreatedByUser) {
nextMessage.messageString = messageString;
nextMessage.tokenCount = tokenCountForMessage;
}
if (newTokenCount > maxTokenCount) {
if (!promptBody) {
// This is the first message, so we can't add it. Just throw an error.
throw new Error(
`Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`,
);
}
// Otherwise, ths message would put us over the token limit, so don't add it.
// if created by user, remove next message, otherwise remove only this message
if (isCreatedByUser) {
nextMessage.remove = true;
}
return false;
}
promptBody = newPromptBody;
currentTokenCount = newTokenCount;
// Switch off isEdited after using it for the first time
if (isEdited) {
isEdited = false;
}
// wait for next tick to avoid blocking the event loop
await new Promise((resolve) => setImmediate(resolve));
return buildPromptBody();
}
return true;
};
await buildPromptBody();
if (nextMessage.remove) {
promptBody = promptBody.replace(nextMessage.messageString, '');
currentTokenCount -= nextMessage.tokenCount;
context.shift();
}
let prompt = `${promptBody}${promptSuffix}`.trim();
// Add 2 tokens for metadata after all messages have been counted.
currentTokenCount += 2;
// Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
this.modelOptions.maxOutputTokens = Math.min(
this.maxContextTokens - currentTokenCount,
this.maxResponseTokens,
);
return { prompt, context };
}
async _getCompletion(payload, abortController = null) {
if (!abortController) {
abortController = new AbortController();
}
const { debug } = this.options;
const url = this.completionsUrl;
if (debug) {
logger.debug('GoogleClient _getCompletion', { url, payload });
}
const opts = {
method: 'POST',
agent: new Agent({
bodyTimeout: 0,
headersTimeout: 0,
}),
signal: abortController.signal,
};
if (this.options.proxy) {
opts.agent = new ProxyAgent(this.options.proxy);
}
const client = await this.getClient();
const res = await client.request({ url, method: 'POST', data: payload });
logger.debug('GoogleClient _getCompletion', { res });
return res.data;
}
createLLM(clientOptions) {
if (this.isGenerativeModel) {
return new ChatGoogleGenerativeAI({ ...clientOptions, apiKey: this.apiKey });
}
return this.isTextModel
? new GoogleVertexAI(clientOptions)
: new ChatGoogleVertexAI(clientOptions);
}
async getCompletion(_payload, options = {}) {
const { onProgress, abortController } = options;
const { parameters, instances } = _payload;
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};
let examples;
let clientOptions = { ...parameters, maxRetries: 2 };
if (!this.isGenerativeModel) {
clientOptions['authOptions'] = {
credentials: {
...this.serviceKey,
},
projectId: this.project_id,
};
}
if (!parameters) {
clientOptions = { ...clientOptions, ...this.modelOptions };
}
if (this.isGenerativeModel) {
clientOptions.modelName = clientOptions.model;
delete clientOptions.model;
}
if (_examples && _examples.length) {
examples = _examples
.map((ex) => {
const { input, output } = ex;
if (!input || !output) {
return undefined;
}
return {
input: new HumanMessage(input.content),
output: new AIMessage(output.content),
};
})
.filter((ex) => ex);
clientOptions.examples = examples;
}
const model = this.createLLM(clientOptions);
let reply = '';
const messages = this.isTextModel ? _payload.trim() : _messages;
if (!this.isVisionModel && context && messages?.length > 0) {
messages.unshift(new SystemMessage(context));
}
const stream = await model.stream(messages, {
signal: abortController.signal,
timeout: 7000,
});
for await (const chunk of stream) {
await this.generateTextStream(chunk?.content ?? chunk, onProgress, {
delay: this.isGenerativeModel ? 12 : 8,
});
reply += chunk?.content ?? chunk;
}
return reply;
}
getSaveOptions() {
return {
promptPrefix: this.options.promptPrefix,
modelLabel: this.options.modelLabel,
...this.modelOptions,
};
}
getBuildMessagesOptions() {
// logger.debug('GoogleClient doesn\'t use getBuildMessagesOptions');
}
async sendCompletion(payload, opts = {}) {
let reply = '';
reply = await this.getCompletion(payload, opts);
return reply.trim();
}
/* TO-DO: Handle tokens with Google tokenization NOTE: these are required */
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
if (tokenizersCache[encoding]) {
return tokenizersCache[encoding];
}
let tokenizer;
if (isModelName) {
tokenizer = encodingForModel(encoding, extendSpecialTokens);
} else {
tokenizer = getEncoding(encoding, extendSpecialTokens);
}
tokenizersCache[encoding] = tokenizer;
return tokenizer;
}
getTokenCount(text) {
return this.gptEncoder.encode(text, 'all').length;
}
}
module.exports = GoogleClient;