diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index fd20176632..8fdbc05544 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -275,8 +275,9 @@ "com_endpoint_google_custom_name_placeholder": "Set a custom name for Google", "com_endpoint_google_maxoutputtokens": "Maximum number of tokens that can be generated in the response. Specify a lower value for shorter responses and a higher value for longer responses. Note: models may stop before reaching this maximum.", "com_endpoint_google_temp": "Higher values = more random, while lower values = more focused and deterministic. We recommend altering this or Top P but not both.", - "com_endpoint_google_thinking": "Enables or disables reasoning. This setting is only supported by certain models (2.5 series). For older models, this setting may have no effect.", - "com_endpoint_google_thinking_budget": "Guides the number of thinking tokens the model uses. The actual amount may exceed or fall below this value depending on the prompt.\n\nThis setting is only supported by certain models (2.5 series). Gemini 2.5 Pro supports 128-32,768 tokens. Gemini 2.5 Flash supports 0-24,576 tokens. Gemini 2.5 Flash Lite supports 512-24,576 tokens.\n\nLeave blank or set to \"-1\" to let the model automatically decide when and how much to think. By default, Gemini 2.5 Flash Lite does not think.", + "com_endpoint_google_thinking": "Enables or disables reasoning. Supported by Gemini 2.5 and 3 series. Note: Gemini 3 Pro cannot fully disable thinking.", + "com_endpoint_google_thinking_budget": "Guides the number of thinking tokens the model uses. The actual amount may exceed or fall below this value depending on the prompt.\n\nThis setting only applies to Gemini 2.5 and older models. For Gemini 3 and later, use the Thinking Level setting instead.\n\nGemini 2.5 Pro supports 128-32,768 tokens. Gemini 2.5 Flash supports 0-24,576 tokens. Gemini 2.5 Flash Lite supports 512-24,576 tokens.\n\nLeave blank or set to \"-1\" to let the model automatically decide when and how much to think. By default, Gemini 2.5 Flash Lite does not think.", + "com_endpoint_google_thinking_level": "Controls the depth of reasoning for Gemini 3 and later models. Has no effect on Gemini 2.5 and older — use Thinking Budget for those.\n\nLeave on Auto to use the model default.", "com_endpoint_google_topk": "Top-k changes how the model selects tokens for output. A top-k of 1 means the selected token is the most probable among all tokens in the model's vocabulary (also called greedy decoding), while a top-k of 3 means that the next token is selected from among the 3 most probable tokens (using temperature).", "com_endpoint_google_topp": "Top-p changes how the model selects tokens for output. Tokens are selected from most K (see topK parameter) probable to least until the sum of their probabilities equals the top-p value.", "com_endpoint_google_use_search_grounding": "Use Google's search grounding feature to enhance responses with real-time web search results. This enables models to access current information and provide more accurate, up-to-date answers.", @@ -346,6 +347,7 @@ "com_endpoint_temperature": "Temperature", "com_endpoint_thinking": "Thinking", "com_endpoint_thinking_budget": "Thinking Budget", + "com_endpoint_thinking_level": "Thinking Level", "com_endpoint_top_k": "Top K", "com_endpoint_top_p": "Top P", "com_endpoint_use_active_assistant": "Use Active Assistant", diff --git a/packages/api/src/endpoints/google/llm.spec.ts b/packages/api/src/endpoints/google/llm.spec.ts index d9aa1a702a..6e2a8ddb25 100644 --- a/packages/api/src/endpoints/google/llm.spec.ts +++ b/packages/api/src/endpoints/google/llm.spec.ts @@ -1,5 +1,5 @@ import { Providers } from '@librechat/agents'; -import { AuthKeys } from 'librechat-data-provider'; +import { AuthKeys, ThinkingLevel } from 'librechat-data-provider'; import type * as t from '~/types'; import { getGoogleConfig, getSafetySettings, knownGoogleParams } from './llm'; @@ -367,6 +367,191 @@ describe('getGoogleConfig', () => { }); }); + describe('Gemini 3 Thinking Level', () => { + it('should use thinkingLevel for Gemini 3 models with Google provider', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3-pro-preview', + thinking: true, + thinkingLevel: ThinkingLevel.high, + }, + }); + + expect(result.llmConfig).toHaveProperty('thinkingConfig'); + expect((result.llmConfig as Record).thinkingConfig).toMatchObject({ + includeThoughts: true, + thinkingLevel: ThinkingLevel.high, + }); + expect((result.llmConfig as Record).thinkingConfig).not.toHaveProperty( + 'thinkingBudget', + ); + }); + + it('should use thinkingLevel for Gemini 3.1 models', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3.1-pro-preview', + thinking: true, + thinkingLevel: ThinkingLevel.medium, + }, + }); + + expect((result.llmConfig as Record).thinkingConfig).toMatchObject({ + includeThoughts: true, + thinkingLevel: ThinkingLevel.medium, + }); + }); + + it('should omit thinkingLevel when unset (empty string) for Gemini 3', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3-flash-preview', + thinking: true, + thinkingLevel: ThinkingLevel.unset, + }, + }); + + expect(result.llmConfig).toHaveProperty('thinkingConfig'); + expect((result.llmConfig as Record).thinkingConfig).toMatchObject({ + includeThoughts: true, + }); + expect((result.llmConfig as Record).thinkingConfig).not.toHaveProperty( + 'thinkingLevel', + ); + }); + + it('should not set thinkingConfig when thinking is false for Gemini 3', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3-pro-preview', + thinking: false, + thinkingLevel: ThinkingLevel.high, + }, + }); + + expect(result.llmConfig).not.toHaveProperty('thinkingConfig'); + }); + + it('should use thinkingLevel for Gemini 3 with Vertex AI provider', () => { + const credentials = { + [AuthKeys.GOOGLE_SERVICE_KEY]: { + project_id: 'test-project', + }, + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3-pro-preview', + thinking: true, + thinkingLevel: ThinkingLevel.low, + }, + }); + + expect(result.provider).toBe(Providers.VERTEXAI); + expect((result.llmConfig as Record).thinkingConfig).toMatchObject({ + includeThoughts: true, + thinkingLevel: ThinkingLevel.low, + }); + expect(result.llmConfig).toHaveProperty('includeThoughts', true); + }); + + it('should send thinkingConfig by default for Gemini 3 (no thinking options set)', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3-pro-preview', + }, + }); + + expect(result.llmConfig).toHaveProperty('thinkingConfig'); + const config = (result.llmConfig as Record).thinkingConfig; + expect(config).toMatchObject({ includeThoughts: true }); + expect(config).not.toHaveProperty('thinkingLevel'); + }); + + it('should ignore thinkingBudget for Gemini 3+ models', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3-pro-preview', + thinking: true, + thinkingBudget: 5000, + }, + }); + + const config = (result.llmConfig as Record).thinkingConfig; + expect(config).not.toHaveProperty('thinkingBudget'); + expect(config).toMatchObject({ includeThoughts: true }); + }); + + it('should NOT classify gemini-2.9-flash as Gemini 3+', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-2.9-flash', + thinking: true, + thinkingBudget: 5000, + }, + }); + + expect((result.llmConfig as Record).thinkingConfig).toMatchObject({ + thinkingBudget: 5000, + includeThoughts: true, + }); + expect((result.llmConfig as Record).thinkingConfig).not.toHaveProperty( + 'thinkingLevel', + ); + }); + + it('should use thinkingBudget (not thinkingLevel) for Gemini 2.5 models', () => { + const credentials = { + [AuthKeys.GOOGLE_API_KEY]: 'test-api-key', + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-2.5-flash', + thinking: true, + thinkingBudget: 5000, + thinkingLevel: ThinkingLevel.high, + }, + }); + + expect((result.llmConfig as Record).thinkingConfig).toMatchObject({ + thinkingBudget: 5000, + includeThoughts: true, + }); + expect((result.llmConfig as Record).thinkingConfig).not.toHaveProperty( + 'thinkingLevel', + ); + }); + }); + describe('Web Search Functionality', () => { it('should enable web search when web_search is true', () => { const credentials = { diff --git a/packages/api/src/endpoints/google/llm.ts b/packages/api/src/endpoints/google/llm.ts index 289bc0e952..83951f9e0c 100644 --- a/packages/api/src/endpoints/google/llm.ts +++ b/packages/api/src/endpoints/google/llm.ts @@ -150,6 +150,7 @@ export function getGoogleConfig( const { web_search, + thinkingLevel, thinking = googleSettings.thinking.default, thinkingBudget = googleSettings.thinkingBudget.default, ...modelOptions @@ -196,19 +197,48 @@ export function getGoogleConfig( ); } - const shouldEnableThinking = - thinking && thinkingBudget != null && (thinkingBudget > 0 || thinkingBudget === -1); + const modelName = (modelOptions?.model ?? '') as string; - if (shouldEnableThinking && provider === Providers.GOOGLE) { - (llmConfig as GoogleClientOptions).thinkingConfig = { - thinkingBudget: thinking ? thinkingBudget : googleSettings.thinkingBudget.default, - includeThoughts: Boolean(thinking), + /** + * Gemini 3+ uses a qualitative `thinkingLevel` ('minimal'|'low'|'medium'|'high') + * instead of the numeric `thinkingBudget` used by Gemini 2.5 and earlier. + * When `thinking` is enabled (default: true), we always send `thinkingConfig` + * with `includeThoughts: true`. The `thinkingBudget` param is ignored for Gemini 3+. + * + * For Vertex AI, top-level `includeThoughts` is still required because + * `@langchain/google-common`'s `formatGenerationConfig` reads it separately + * from `thinkingConfig` — they serve different purposes in the request pipeline. + */ + const isGemini3Plus = /gemini-([3-9]|\d{2,})/i.test(modelName); + + if (isGemini3Plus && thinking) { + const thinkingConfig: { includeThoughts: boolean; thinkingLevel?: string } = { + includeThoughts: true, }; - } else if (shouldEnableThinking && provider === Providers.VERTEXAI) { - (llmConfig as VertexAIClientOptions).thinkingBudget = thinking - ? thinkingBudget - : googleSettings.thinkingBudget.default; - (llmConfig as VertexAIClientOptions).includeThoughts = Boolean(thinking); + if (thinkingLevel) { + thinkingConfig.thinkingLevel = thinkingLevel as string; + } + if (provider === Providers.GOOGLE) { + (llmConfig as GoogleClientOptions).thinkingConfig = thinkingConfig; + } else if (provider === Providers.VERTEXAI) { + (llmConfig as Record).thinkingConfig = thinkingConfig; + (llmConfig as VertexAIClientOptions).includeThoughts = true; + } + } else if (!isGemini3Plus) { + const shouldEnableThinking = + thinking && thinkingBudget != null && (thinkingBudget > 0 || thinkingBudget === -1); + + if (shouldEnableThinking && provider === Providers.GOOGLE) { + (llmConfig as GoogleClientOptions).thinkingConfig = { + thinkingBudget: thinking ? thinkingBudget : googleSettings.thinkingBudget.default, + includeThoughts: Boolean(thinking), + }; + } else if (shouldEnableThinking && provider === Providers.VERTEXAI) { + (llmConfig as VertexAIClientOptions).thinkingBudget = thinking + ? thinkingBudget + : googleSettings.thinkingBudget.default; + (llmConfig as VertexAIClientOptions).includeThoughts = Boolean(thinking); + } } /* diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 64fc99b0eb..4a51844fb8 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1196,6 +1196,9 @@ export const defaultModels = { // Gemini 3.1 Models 'gemini-3.1-pro-preview', 'gemini-3.1-pro-preview-customtools', + // Gemini 3 Models + 'gemini-3-pro-preview', + 'gemini-3-flash-preview', // Gemini 2.5 Models 'gemini-2.5-pro', 'gemini-2.5-flash', diff --git a/packages/data-provider/src/parameterSettings.ts b/packages/data-provider/src/parameterSettings.ts index 229f970c7d..d0cfdf210f 100644 --- a/packages/data-provider/src/parameterSettings.ts +++ b/packages/data-provider/src/parameterSettings.ts @@ -1,6 +1,7 @@ import { Verbosity, ImageDetail, + ThinkingLevel, EModelEndpoint, openAISettings, googleSettings, @@ -672,6 +673,32 @@ const google: Record = { optionType: 'conversation', columnSpan: 2, }, + thinkingLevel: { + key: 'thinkingLevel', + label: 'com_endpoint_thinking_level', + labelCode: true, + description: 'com_endpoint_google_thinking_level', + descriptionCode: true, + type: 'enum', + default: ThinkingLevel.unset, + component: 'slider', + options: [ + ThinkingLevel.unset, + ThinkingLevel.minimal, + ThinkingLevel.low, + ThinkingLevel.medium, + ThinkingLevel.high, + ], + enumMappings: { + [ThinkingLevel.unset]: 'com_ui_auto', + [ThinkingLevel.minimal]: 'com_ui_minimal', + [ThinkingLevel.low]: 'com_ui_low', + [ThinkingLevel.medium]: 'com_ui_medium', + [ThinkingLevel.high]: 'com_ui_high', + }, + optionType: 'conversation', + columnSpan: 4, + }, web_search: { key: 'web_search', label: 'com_endpoint_use_search_grounding', @@ -698,6 +725,7 @@ const googleConfig: SettingsConfiguration = [ librechat.resendFiles, google.thinking, google.thinkingBudget, + google.thinkingLevel, google.web_search, librechat.fileTokenLimit, ]; @@ -717,6 +745,7 @@ const googleCol2: SettingsConfiguration = [ librechat.resendFiles, google.thinking, google.thinkingBudget, + google.thinkingLevel, google.web_search, librechat.fileTokenLimit, ]; diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 02096cb0cf..63a7ed574e 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -205,6 +205,14 @@ export enum Verbosity { high = 'high', } +export enum ThinkingLevel { + unset = '', + minimal = 'minimal', + low = 'low', + medium = 'medium', + high = 'high', +} + export const imageDetailNumeric = { [ImageDetail.low]: 0, [ImageDetail.auto]: 1, @@ -222,6 +230,7 @@ export const eReasoningEffortSchema = z.nativeEnum(ReasoningEffort); export const eAnthropicEffortSchema = z.nativeEnum(AnthropicEffort); export const eReasoningSummarySchema = z.nativeEnum(ReasoningSummary); export const eVerbositySchema = z.nativeEnum(Verbosity); +export const eThinkingLevelSchema = z.nativeEnum(ThinkingLevel); export const defaultAssistantFormValues = { assistant: '', @@ -366,6 +375,9 @@ export const googleSettings = { */ default: -1 as const, }, + thinkingLevel: { + default: ThinkingLevel.unset as const, + }, }; const ANTHROPIC_MAX_OUTPUT = 128000 as const; @@ -722,6 +734,7 @@ export const tConversationSchema = z.object({ system: z.string().optional(), thinking: z.boolean().optional(), thinkingBudget: coerceNumber.optional(), + thinkingLevel: eThinkingLevelSchema.optional(), stream: z.boolean().optional(), /* artifacts */ artifacts: z.string().optional(), @@ -868,6 +881,7 @@ export const tQueryParamsSchema = tConversationSchema promptCache: true, thinking: true, thinkingBudget: true, + thinkingLevel: true, effort: true, /** @endpoints bedrock */ region: true, @@ -943,6 +957,7 @@ export const googleBaseSchema = tConversationSchema.pick({ topK: true, thinking: true, thinkingBudget: true, + thinkingLevel: true, web_search: true, fileTokenLimit: true, iconURL: true, @@ -974,6 +989,7 @@ export const googleGenConfigSchema = z .object({ includeThoughts: z.boolean().optional(), thinkingBudget: coerceNumber.optional(), + thinkingLevel: z.string().optional(), }) .optional(), web_search: z.boolean().optional(), diff --git a/packages/data-provider/src/types.ts b/packages/data-provider/src/types.ts index a7782a3bc6..3b04c40f45 100644 --- a/packages/data-provider/src/types.ts +++ b/packages/data-provider/src/types.ts @@ -52,6 +52,7 @@ export type TEndpointOption = Pick< | 'promptCache' | 'thinking' | 'thinkingBudget' + | 'thinkingLevel' | 'effort' // Assistant/Agent fields | 'assistant_id' diff --git a/packages/data-schemas/src/schema/defaults.ts b/packages/data-schemas/src/schema/defaults.ts index 33af668384..9b50bceb1d 100644 --- a/packages/data-schemas/src/schema/defaults.ts +++ b/packages/data-schemas/src/schema/defaults.ts @@ -83,6 +83,9 @@ export const conversationPreset = { thinkingBudget: { type: Number, }, + thinkingLevel: { + type: String, + }, effort: { type: String, },