📸 feat: Gemini vision, Improved Logs and Multi-modal Handling (#1368)

* feat: add GOOGLE_MODELS env var

* feat: add gemini vision support

* refactor(GoogleClient): adjust clientOptions handling depending on model

* fix(logger): fix redact logic and redact errors only

* fix(GoogleClient): do not allow non-multiModal messages when gemini-pro-vision is selected

* refactor(OpenAIClient): use `isVisionModel` client property to avoid calling validateVisionModel multiple times

* refactor: better debug logging by correctly traversing, redacting sensitive info, and logging condensed versions of long values

* refactor(GoogleClient): allow response errors to be thrown/caught above client handling so user receives meaningful error message
debug orderedMessages, parentMessageId, and buildMessages result

* refactor(AskController): use model from client.modelOptions.model when saving intermediate messages, which requires for the progress callback to be initialized after the client is initialized

* feat(useSSE): revert to previous model if the model was auto-switched by backend due to message attachments

* docs: update with google updates, notes about Gemini Pro Vision

* fix: redis should not be initialized without USE_REDIS and increase max listeners to 20
This commit is contained in:
Danny Avila 2023-12-16 20:45:27 -05:00 committed by GitHub
parent 676f133545
commit 0c326797dd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 356 additions and 210 deletions

View file

@ -1,7 +1,7 @@
const OpenAI = require('openai');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const { getResponseSender, EModelEndpoint } = require('librechat-data-provider');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const { encodeAndFormat, validateVisionModel } = require('~/server/services/Files/images');
const { getModelMaxTokens, genAzureChatCompletion, extractBaseURL } = require('~/utils');
const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts');
@ -76,11 +76,14 @@ class OpenAIClient extends BaseClient {
};
}
if (this.options.attachments && !validateVisionModel(this.modelOptions.model)) {
this.isVisionModel = validateVisionModel(this.modelOptions.model);
if (this.options.attachments && !this.isVisionModel) {
this.modelOptions.model = 'gpt-4-vision-preview';
this.isVisionModel = true;
}
if (validateVisionModel(this.modelOptions.model)) {
if (this.isVisionModel) {
delete this.modelOptions.stop;
}
@ -152,7 +155,7 @@ class OpenAIClient extends BaseClient {
this.setupTokens();
if (!this.modelOptions.stop && !validateVisionModel(this.modelOptions.model)) {
if (!this.modelOptions.stop && !this.isVisionModel) {
const stopTokens = [this.startToken];
if (this.endToken && this.endToken !== this.startToken) {
stopTokens.push(this.endToken);
@ -689,7 +692,7 @@ ${convo}
}
async recordTokenUsage({ promptTokens, completionTokens }) {
logger.debug('[OpenAIClient]', { promptTokens, completionTokens });
logger.debug('[OpenAIClient] recordTokenUsage:', { promptTokens, completionTokens });
await spendTokens(
{
user: this.user,
@ -757,7 +760,7 @@ ${convo}
opts.httpAgent = new HttpsProxyAgent(this.options.proxy);
}
if (validateVisionModel(modelOptions.model)) {
if (this.isVisionModel) {
modelOptions.max_tokens = 4000;
}