From ec922986a937d7d3d389466aee5a14280c2dfccd Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Mon, 21 Oct 2024 09:41:04 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20Address=20Minor=20Agent?= =?UTF-8?q?=20Issues=20(#4483)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(Agents): remove test code in openAI/llm.js * refactor: add use of enums in encodeAndFormat * fix: image attachment payload formatting for agents * chore: imports --- api/server/controllers/agents/client.js | 2 ++ .../services/Endpoints/openAI/initialize.js | 2 +- api/server/services/Endpoints/openAI/llm.js | 1 - api/server/services/Files/images/encode.js | 20 +++++++++++++++---- packages/data-provider/src/config.ts | 1 + 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index b33e144f4a..f035d546f2 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -10,6 +10,7 @@ const { Callback, createMetadataAggregator } = require('@librechat/agents'); const { Constants, + VisionModes, openAISchema, EModelEndpoint, anthropicSchema, @@ -196,6 +197,7 @@ class AgentClient extends BaseClient { this.options.req, attachments, this.options.agent.provider, + VisionModes.agents, ); message.image_urls = image_urls.length ? image_urls : undefined; return files; diff --git a/api/server/services/Endpoints/openAI/initialize.js b/api/server/services/Endpoints/openAI/initialize.js index b72b3d32c4..215b943730 100644 --- a/api/server/services/Endpoints/openAI/initialize.js +++ b/api/server/services/Endpoints/openAI/initialize.js @@ -130,7 +130,7 @@ const initializeClient = async ({ if (optionsOnly) { const requestOptions = Object.assign( { - modelOptions: endpointOption.modelOptions, + modelOptions: endpointOption.model_parameters, }, clientOptions, ); diff --git a/api/server/services/Endpoints/openAI/llm.js b/api/server/services/Endpoints/openAI/llm.js index 3817224a4b..bd51679e1b 100644 --- a/api/server/services/Endpoints/openAI/llm.js +++ b/api/server/services/Endpoints/openAI/llm.js @@ -38,7 +38,6 @@ function getLLMConfig(apiKey, options = {}) { } = options; let llmConfig = { - model: 'gpt-4o-mini', streaming, }; diff --git a/api/server/services/Files/images/encode.js b/api/server/services/Files/images/encode.js index 05c9fc1d33..f457927019 100644 --- a/api/server/services/Files/images/encode.js +++ b/api/server/services/Files/images/encode.js @@ -1,6 +1,12 @@ const axios = require('axios'); -const { EModelEndpoint, FileSources, VisionModes } = require('librechat-data-provider'); -const { getStrategyFunctions } = require('../strategies'); +const { + FileSources, + VisionModes, + ImageDetail, + ContentTypes, + EModelEndpoint, +} = require('librechat-data-provider'); +const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { logger } = require('~/config'); /** @@ -79,7 +85,7 @@ async function encodeAndFormat(req, files, endpoint, mode) { promises.push(preparePayload(req, file)); } - const detail = req.body.imageDetail ?? 'auto'; + const detail = req.body.imageDetail ?? ImageDetail.auto; /** @type {Array<[MongoFile, string]>} */ const formattedImages = await Promise.all(promises); @@ -104,7 +110,7 @@ async function encodeAndFormat(req, files, endpoint, mode) { } const imagePart = { - type: 'image_url', + type: ContentTypes.IMAGE_URL, image_url: { url: imageContent.startsWith('http') ? imageContent @@ -113,6 +119,12 @@ async function encodeAndFormat(req, files, endpoint, mode) { }, }; + if (mode === VisionModes.agents) { + result.image_urls.push(imagePart); + result.files.push(fileMetadata); + continue; + } + if (endpoint && endpoint === EModelEndpoint.google && mode === VisionModes.generative) { delete imagePart.image_url; imagePart.inlineData = { diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index b95e0f138e..6ff6a46926 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -752,6 +752,7 @@ export const visionModels = [ ]; export enum VisionModes { generative = 'generative', + agents = 'agents', } export function validateVisionModel({