diff --git a/.env.example b/.env.example index ac387b8c68..e4aa8a46f0 100644 --- a/.env.example +++ b/.env.example @@ -140,7 +140,7 @@ GOOGLE_KEY=user_provided # GOOGLE_REVERSE_PROXY= # Gemini API (AI Studio) -# GOOGLE_MODELS=gemini-1.5-flash-latest,gemini-1.0-pro,gemini-1.0-pro-001,gemini-1.0-pro-latest,gemini-1.0-pro-vision-latest,gemini-1.5-pro-latest,gemini-pro,gemini-pro-vision +# GOOGLE_MODELS=gemini-exp-1121,gemini-exp-1114,gemini-1.5-flash-latest,gemini-1.0-pro,gemini-1.0-pro-001,gemini-1.0-pro-latest,gemini-1.0-pro-vision-latest,gemini-1.5-pro-latest,gemini-pro,gemini-pro-vision # Vertex AI # GOOGLE_MODELS=gemini-1.5-flash-preview-0514,gemini-1.5-pro-preview-0514,gemini-1.0-pro-vision-001,gemini-1.0-pro-002,gemini-1.0-pro-001,gemini-pro-vision,gemini-1.0-pro diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 325d13d9fd..ab1094b776 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -35,6 +35,7 @@ const endpointPrefix = `https://${loc}-aiplatform.googleapis.com`; const tokenizersCache = {}; const settings = endpointSettings[EModelEndpoint.google]; +const EXCLUDED_GENAI_MODELS = /gemini-(?:1\.0|1-0|pro)/; class GoogleClient extends BaseClient { constructor(credentials, options = {}) { @@ -366,7 +367,7 @@ class GoogleClient extends BaseClient { ); } - if (!this.project_id && this.modelOptions.model.includes('1.5')) { + if (!this.project_id && !EXCLUDED_GENAI_MODELS.test(this.modelOptions.model)) { return await this.buildGenerativeMessages(messages); } @@ -604,15 +605,12 @@ class GoogleClient extends BaseClient { } else if (this.project_id) { logger.debug('Creating VertexAI client'); return new ChatVertexAI(clientOptions); - } else if (model.includes('1.5')) { + } else if (!EXCLUDED_GENAI_MODELS.test(model)) { logger.debug('Creating GenAI client'); - return new GenAI(this.apiKey).getGenerativeModel( - { - ...clientOptions, - model, - }, - { apiVersion: 'v1beta' }, - ); + return new GenAI(this.apiKey).getGenerativeModel({ + ...clientOptions, + model, + }); } logger.debug('Creating Chat Google Generative AI client'); @@ -674,7 +672,7 @@ class GoogleClient extends BaseClient { } const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; - if (modelName?.includes('1.5') && !this.project_id) { + if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { const client = model; const requestOptions = { contents: _payload, @@ -697,7 +695,7 @@ class GoogleClient extends BaseClient { requestOptions.safetySettings = _payload.safetySettings; - const delay = modelName.includes('flash') ? 8 : 14; + const delay = modelName.includes('flash') ? 8 : 15; const result = await client.generateContentStream(requestOptions); for await (const chunk of result.stream) { const chunkText = chunk.text(); @@ -712,7 +710,6 @@ class GoogleClient extends BaseClient { const stream = await model.stream(messages, { signal: abortController.signal, - timeout: 7000, safetySettings: _payload.safetySettings, }); @@ -720,7 +717,7 @@ class GoogleClient extends BaseClient { if (!this.options.streamRate) { if (this.isGenerativeModel) { - delay = 12; + delay = 15; } if (modelName.includes('flash')) { delay = 5; @@ -774,8 +771,8 @@ class GoogleClient extends BaseClient { const messages = this.isTextModel ? _payload.trim() : _messages; const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; - if (modelName?.includes('1.5') && !this.project_id) { - logger.debug('Identified titling model as 1.5 version'); + if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) { + logger.debug('Identified titling model as GenAI version'); /** @type {GenerativeModel} */ const client = model; const requestOptions = { diff --git a/api/package.json b/api/package.json index d992b916c0..1b9b480e96 100644 --- a/api/package.json +++ b/api/package.json @@ -41,7 +41,7 @@ "@keyv/redis": "^2.8.1", "@langchain/community": "^0.3.13", "@langchain/core": "^0.3.17", - "@langchain/google-genai": "^0.1.3", + "@langchain/google-genai": "^0.1.4", "@langchain/google-vertexai": "^0.1.2", "@langchain/textsplitters": "^0.1.0", "@librechat/agents": "^1.7.7", diff --git a/api/utils/tokens.js b/api/utils/tokens.js index 4bf66508f8..b7ede61a47 100644 --- a/api/utils/tokens.js +++ b/api/utils/tokens.js @@ -49,6 +49,7 @@ const googleModels = { /* Max I/O is combined so we subtract the amount from max response tokens for actual total */ gemini: 30720, // -2048 from max 'gemini-pro-vision': 12288, // -4096 from max + 'gemini-exp': 8000, 'gemini-1.5': 1048576, // -8192 from max 'text-bison-32k': 32758, // -10 from max 'chat-bison-32k': 32758, // -10 from max diff --git a/package-lock.json b/package-lock.json index fbe1e0f4a7..8a1e25021f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -50,7 +50,7 @@ "@keyv/redis": "^2.8.1", "@langchain/community": "^0.3.13", "@langchain/core": "^0.3.17", - "@langchain/google-genai": "^0.1.3", + "@langchain/google-genai": "^0.1.4", "@langchain/google-vertexai": "^0.1.2", "@langchain/textsplitters": "^0.1.0", "@librechat/agents": "^1.7.7", @@ -218,10 +218,9 @@ } }, "api/node_modules/@langchain/google-genai": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-0.1.3.tgz", - "integrity": "sha512-GHZV4qEMoi+rnqSM5I+ADXwUSBRSD0hsmlS1lTQEGW9HmvzPu3zryvYjuRAoelZSENTmZmBatdM+kgiV8H2+JA==", - "license": "MIT", + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-0.1.4.tgz", + "integrity": "sha512-b8qrqnHYbNseaAikrWyxuDTww6CUIse82F5/BmF2GtWVR25yJrNUWETfTp7o7iIMxhFR0PuQag4gEZOL74F5Tw==", "dependencies": { "@google/generative-ai": "^0.21.0", "zod-to-json-schema": "^3.22.4"