🎯 fix: Google AI Client Stability; feat: gemini-exp models (#4781)

* fix: Google timing out and issuing AbortError, bump package, and use `@google/generative-ai` explicitly for latest models

* feat: gemini-exp-
This commit is contained in:
Danny Avila 2024-11-22 19:08:14 -05:00 committed by GitHub
parent 56b60cf863
commit 2a77c98f51
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 19 additions and 22 deletions

View file

@ -140,7 +140,7 @@ GOOGLE_KEY=user_provided
# GOOGLE_REVERSE_PROXY= # GOOGLE_REVERSE_PROXY=
# Gemini API (AI Studio) # Gemini API (AI Studio)
# GOOGLE_MODELS=gemini-1.5-flash-latest,gemini-1.0-pro,gemini-1.0-pro-001,gemini-1.0-pro-latest,gemini-1.0-pro-vision-latest,gemini-1.5-pro-latest,gemini-pro,gemini-pro-vision # GOOGLE_MODELS=gemini-exp-1121,gemini-exp-1114,gemini-1.5-flash-latest,gemini-1.0-pro,gemini-1.0-pro-001,gemini-1.0-pro-latest,gemini-1.0-pro-vision-latest,gemini-1.5-pro-latest,gemini-pro,gemini-pro-vision
# Vertex AI # Vertex AI
# GOOGLE_MODELS=gemini-1.5-flash-preview-0514,gemini-1.5-pro-preview-0514,gemini-1.0-pro-vision-001,gemini-1.0-pro-002,gemini-1.0-pro-001,gemini-pro-vision,gemini-1.0-pro # GOOGLE_MODELS=gemini-1.5-flash-preview-0514,gemini-1.5-pro-preview-0514,gemini-1.0-pro-vision-001,gemini-1.0-pro-002,gemini-1.0-pro-001,gemini-pro-vision,gemini-1.0-pro

View file

@ -35,6 +35,7 @@ const endpointPrefix = `https://${loc}-aiplatform.googleapis.com`;
const tokenizersCache = {}; const tokenizersCache = {};
const settings = endpointSettings[EModelEndpoint.google]; const settings = endpointSettings[EModelEndpoint.google];
const EXCLUDED_GENAI_MODELS = /gemini-(?:1\.0|1-0|pro)/;
class GoogleClient extends BaseClient { class GoogleClient extends BaseClient {
constructor(credentials, options = {}) { constructor(credentials, options = {}) {
@ -366,7 +367,7 @@ class GoogleClient extends BaseClient {
); );
} }
if (!this.project_id && this.modelOptions.model.includes('1.5')) { if (!this.project_id && !EXCLUDED_GENAI_MODELS.test(this.modelOptions.model)) {
return await this.buildGenerativeMessages(messages); return await this.buildGenerativeMessages(messages);
} }
@ -604,15 +605,12 @@ class GoogleClient extends BaseClient {
} else if (this.project_id) { } else if (this.project_id) {
logger.debug('Creating VertexAI client'); logger.debug('Creating VertexAI client');
return new ChatVertexAI(clientOptions); return new ChatVertexAI(clientOptions);
} else if (model.includes('1.5')) { } else if (!EXCLUDED_GENAI_MODELS.test(model)) {
logger.debug('Creating GenAI client'); logger.debug('Creating GenAI client');
return new GenAI(this.apiKey).getGenerativeModel( return new GenAI(this.apiKey).getGenerativeModel({
{
...clientOptions, ...clientOptions,
model, model,
}, });
{ apiVersion: 'v1beta' },
);
} }
logger.debug('Creating Chat Google Generative AI client'); logger.debug('Creating Chat Google Generative AI client');
@ -674,7 +672,7 @@ class GoogleClient extends BaseClient {
} }
const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; const modelName = clientOptions.modelName ?? clientOptions.model ?? '';
if (modelName?.includes('1.5') && !this.project_id) { if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) {
const client = model; const client = model;
const requestOptions = { const requestOptions = {
contents: _payload, contents: _payload,
@ -697,7 +695,7 @@ class GoogleClient extends BaseClient {
requestOptions.safetySettings = _payload.safetySettings; requestOptions.safetySettings = _payload.safetySettings;
const delay = modelName.includes('flash') ? 8 : 14; const delay = modelName.includes('flash') ? 8 : 15;
const result = await client.generateContentStream(requestOptions); const result = await client.generateContentStream(requestOptions);
for await (const chunk of result.stream) { for await (const chunk of result.stream) {
const chunkText = chunk.text(); const chunkText = chunk.text();
@ -712,7 +710,6 @@ class GoogleClient extends BaseClient {
const stream = await model.stream(messages, { const stream = await model.stream(messages, {
signal: abortController.signal, signal: abortController.signal,
timeout: 7000,
safetySettings: _payload.safetySettings, safetySettings: _payload.safetySettings,
}); });
@ -720,7 +717,7 @@ class GoogleClient extends BaseClient {
if (!this.options.streamRate) { if (!this.options.streamRate) {
if (this.isGenerativeModel) { if (this.isGenerativeModel) {
delay = 12; delay = 15;
} }
if (modelName.includes('flash')) { if (modelName.includes('flash')) {
delay = 5; delay = 5;
@ -774,8 +771,8 @@ class GoogleClient extends BaseClient {
const messages = this.isTextModel ? _payload.trim() : _messages; const messages = this.isTextModel ? _payload.trim() : _messages;
const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; const modelName = clientOptions.modelName ?? clientOptions.model ?? '';
if (modelName?.includes('1.5') && !this.project_id) { if (!EXCLUDED_GENAI_MODELS.test(modelName) && !this.project_id) {
logger.debug('Identified titling model as 1.5 version'); logger.debug('Identified titling model as GenAI version');
/** @type {GenerativeModel} */ /** @type {GenerativeModel} */
const client = model; const client = model;
const requestOptions = { const requestOptions = {

View file

@ -41,7 +41,7 @@
"@keyv/redis": "^2.8.1", "@keyv/redis": "^2.8.1",
"@langchain/community": "^0.3.13", "@langchain/community": "^0.3.13",
"@langchain/core": "^0.3.17", "@langchain/core": "^0.3.17",
"@langchain/google-genai": "^0.1.3", "@langchain/google-genai": "^0.1.4",
"@langchain/google-vertexai": "^0.1.2", "@langchain/google-vertexai": "^0.1.2",
"@langchain/textsplitters": "^0.1.0", "@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^1.7.7", "@librechat/agents": "^1.7.7",

View file

@ -49,6 +49,7 @@ const googleModels = {
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */ /* Max I/O is combined so we subtract the amount from max response tokens for actual total */
gemini: 30720, // -2048 from max gemini: 30720, // -2048 from max
'gemini-pro-vision': 12288, // -4096 from max 'gemini-pro-vision': 12288, // -4096 from max
'gemini-exp': 8000,
'gemini-1.5': 1048576, // -8192 from max 'gemini-1.5': 1048576, // -8192 from max
'text-bison-32k': 32758, // -10 from max 'text-bison-32k': 32758, // -10 from max
'chat-bison-32k': 32758, // -10 from max 'chat-bison-32k': 32758, // -10 from max

9
package-lock.json generated
View file

@ -50,7 +50,7 @@
"@keyv/redis": "^2.8.1", "@keyv/redis": "^2.8.1",
"@langchain/community": "^0.3.13", "@langchain/community": "^0.3.13",
"@langchain/core": "^0.3.17", "@langchain/core": "^0.3.17",
"@langchain/google-genai": "^0.1.3", "@langchain/google-genai": "^0.1.4",
"@langchain/google-vertexai": "^0.1.2", "@langchain/google-vertexai": "^0.1.2",
"@langchain/textsplitters": "^0.1.0", "@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^1.7.7", "@librechat/agents": "^1.7.7",
@ -218,10 +218,9 @@
} }
}, },
"api/node_modules/@langchain/google-genai": { "api/node_modules/@langchain/google-genai": {
"version": "0.1.3", "version": "0.1.4",
"resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-0.1.3.tgz", "resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-0.1.4.tgz",
"integrity": "sha512-GHZV4qEMoi+rnqSM5I+ADXwUSBRSD0hsmlS1lTQEGW9HmvzPu3zryvYjuRAoelZSENTmZmBatdM+kgiV8H2+JA==", "integrity": "sha512-b8qrqnHYbNseaAikrWyxuDTww6CUIse82F5/BmF2GtWVR25yJrNUWETfTp7o7iIMxhFR0PuQag4gEZOL74F5Tw==",
"license": "MIT",
"dependencies": { "dependencies": {
"@google/generative-ai": "^0.21.0", "@google/generative-ai": "^0.21.0",
"zod-to-json-schema": "^3.22.4" "zod-to-json-schema": "^3.22.4"