🧠 feat: Thinking Budget, Include Thoughts, and Dynamic Thinking for Gemini 2.5 (#8055)

* feat: support thinking budget parameter for Gemini 2.5 series (#6949, #7542)

https://ai.google.dev/gemini-api/docs/thinking#set-budget

* refactor: update thinking budget minimum value to -1 for dynamic thinking

- see: https://ai.google.dev/gemini-api/docs/thinking#set-budget

* chore: bump @librechat/agents to v2.4.43

* refactor: rename LLMConfigOptions to OpenAIConfigOptions for clarity and consistency

- Updated type definitions and references in initialize.ts, llm.ts, and openai.ts to reflect the new naming convention.
- Ensured that the OpenAI configuration options are consistently used across the relevant files.

* refactor: port Google LLM methods to TypeScript Package

* chore: update @librechat/agents version to 2.4.43 in package-lock.json and package.json

* refactor: update thinking budget description for clarity and adjust placeholder in parameter settings

* refactor: enhance googleSettings default value for thinking budget to support dynamic adjustment

* chore: update @librechat/agents to v2.4.44 for Vertex Dynamic Thinking workaround

* refactor: rename google config function, update `createRun` types, use `reasoning` as `reasoningKey` for Google

* refactor: simplify placeholder handling in DynamicInput component

* refactor: enhance thinking budget description for clarity and allow automatic decision by setting to "-1"

* refactor: update text styling in OptionHover component for improved readability

* chore: update @librechat/agents dependency to v2.4.46 in package.json and package-lock.json

* chore: update @librechat/api version to 1.2.5 in package.json and package-lock.json

* refactor: enhance `clientOptions` handling by filtering `omitTitleOptions`, add `json` field for Google models

---------

Co-authored-by: ciffelia <15273128+ciffelia@users.noreply.github.com>
This commit is contained in:
Danny Avila 2025-06-25 15:14:33 -04:00 committed by GitHub
parent b169306096
commit c87422a1e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 212 additions and 108 deletions

View file

@ -1,7 +1,7 @@
const { google } = require('googleapis');
const { Tokenizer } = require('@librechat/api');
const { concat } = require('@langchain/core/utils/stream');
const { ChatVertexAI } = require('@langchain/google-vertexai');
const { Tokenizer, getSafetySettings } = require('@librechat/api');
const { ChatGoogleGenerativeAI } = require('@langchain/google-genai');
const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai');
const { HumanMessage, SystemMessage } = require('@langchain/core/messages');
@ -12,13 +12,13 @@ const {
endpointSettings,
parseTextParts,
EModelEndpoint,
googleSettings,
ContentTypes,
VisionModes,
ErrorTypes,
Constants,
AuthKeys,
} = require('librechat-data-provider');
const { getSafetySettings } = require('~/server/services/Endpoints/google/llm');
const { encodeAndFormat } = require('~/server/services/Files/images');
const { spendTokens } = require('~/models/spendTokens');
const { getModelMaxTokens } = require('~/utils');
@ -166,6 +166,16 @@ class GoogleClient extends BaseClient {
);
}
// Add thinking configuration
this.modelOptions.thinkingConfig = {
thinkingBudget:
(this.modelOptions.thinking ?? googleSettings.thinking.default)
? this.modelOptions.thinkingBudget
: 0,
};
delete this.modelOptions.thinking;
delete this.modelOptions.thinkingBudget;
this.sender =
this.options.sender ??
getResponseSender({

View file

@ -48,7 +48,7 @@
"@langchain/google-genai": "^0.2.13",
"@langchain/google-vertexai": "^0.2.13",
"@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^2.4.42",
"@librechat/agents": "^2.4.46",
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@node-saml/passport-saml": "^5.0.0",

View file

@ -44,6 +44,17 @@ const BaseClient = require('~/app/clients/BaseClient');
const { loadAgent } = require('~/models/Agent');
const { getMCPManager } = require('~/config');
const omitTitleOptions = new Set([
'stream',
'thinking',
'streaming',
'clientOptions',
'thinkingConfig',
'thinkingBudget',
'includeThoughts',
'maxOutputTokens',
]);
/**
* @param {ServerRequest} req
* @param {Agent} agent
@ -1038,6 +1049,16 @@ class AgentClient extends BaseClient {
delete clientOptions.maxTokens;
}
clientOptions = Object.assign(
Object.fromEntries(
Object.entries(clientOptions).filter(([key]) => !omitTitleOptions.has(key)),
),
);
if (provider === Providers.GOOGLE) {
clientOptions.json = true;
}
try {
const titleResult = await this.run.generateTitle({
provider,

View file

@ -1,7 +1,6 @@
const { getGoogleConfig, isEnabled } = require('@librechat/api');
const { EModelEndpoint, AuthKeys } = require('librechat-data-provider');
const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService');
const { getLLMConfig } = require('~/server/services/Endpoints/google/llm');
const { isEnabled } = require('~/server/utils');
const { GoogleClient } = require('~/app');
const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => {
@ -65,7 +64,7 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio
if (overrideModel) {
clientOptions.modelOptions.model = overrideModel;
}
return getLLMConfig(credentials, clientOptions);
return getGoogleConfig(credentials, clientOptions);
}
const client = new GoogleClient(credentials, clientOptions);

View file

@ -1,176 +0,0 @@
const { Providers } = require('@librechat/agents');
const { AuthKeys } = require('librechat-data-provider');
const { isEnabled } = require('~/server/utils');
function getThresholdMapping(model) {
const gemini1Pattern = /gemini-(1\.0|1\.5|pro$|1\.0-pro|1\.5-pro|1\.5-flash-001)/;
const restrictedPattern = /(gemini-(1\.5-flash-8b|2\.0|exp)|learnlm)/;
if (gemini1Pattern.test(model)) {
return (value) => {
if (value === 'OFF') {
return 'BLOCK_NONE';
}
return value;
};
}
if (restrictedPattern.test(model)) {
return (value) => {
if (value === 'OFF' || value === 'HARM_BLOCK_THRESHOLD_UNSPECIFIED') {
return 'BLOCK_NONE';
}
return value;
};
}
return (value) => value;
}
/**
*
* @param {string} model
* @returns {Array<{category: string, threshold: string}> | undefined}
*/
function getSafetySettings(model) {
if (isEnabled(process.env.GOOGLE_EXCLUDE_SAFETY_SETTINGS)) {
return undefined;
}
const mapThreshold = getThresholdMapping(model);
return [
{
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_HATE_SPEECH',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_HATE_SPEECH || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_HARASSMENT',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_HARASSMENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
threshold: mapThreshold(
process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
),
},
{
category: 'HARM_CATEGORY_CIVIC_INTEGRITY',
threshold: mapThreshold(process.env.GOOGLE_SAFETY_CIVIC_INTEGRITY || 'BLOCK_NONE'),
},
];
}
/**
* Replicates core logic from GoogleClient's constructor and setOptions, plus client determination.
* Returns an object with the provider label and the final options that would be passed to createLLM.
*
* @param {string | object} credentials - Either a JSON string or an object containing Google keys
* @param {object} [options={}] - The same shape as the "GoogleClient" constructor options
*/
function getLLMConfig(credentials, options = {}) {
// 1. Parse credentials
let creds = {};
if (typeof credentials === 'string') {
try {
creds = JSON.parse(credentials);
} catch (err) {
throw new Error(`Error parsing string credentials: ${err.message}`);
}
} else if (credentials && typeof credentials === 'object') {
creds = credentials;
}
// Extract from credentials
const serviceKeyRaw = creds[AuthKeys.GOOGLE_SERVICE_KEY] ?? {};
const serviceKey =
typeof serviceKeyRaw === 'string' ? JSON.parse(serviceKeyRaw) : (serviceKeyRaw ?? {});
const project_id = serviceKey?.project_id ?? null;
const apiKey = creds[AuthKeys.GOOGLE_API_KEY] ?? null;
const reverseProxyUrl = options.reverseProxyUrl;
const authHeader = options.authHeader;
/** @type {GoogleClientOptions | VertexAIClientOptions} */
let llmConfig = {
...(options.modelOptions || {}),
maxRetries: 2,
};
/** Used only for Safety Settings */
llmConfig.safetySettings = getSafetySettings(llmConfig.model);
let provider;
if (project_id) {
provider = Providers.VERTEXAI;
} else {
provider = Providers.GOOGLE;
}
// If we have a GCP project => Vertex AI
if (project_id && provider === Providers.VERTEXAI) {
/** @type {VertexAIClientOptions['authOptions']} */
llmConfig.authOptions = {
credentials: { ...serviceKey },
projectId: project_id,
};
llmConfig.location = process.env.GOOGLE_LOC || 'us-central1';
} else if (apiKey && provider === Providers.GOOGLE) {
llmConfig.apiKey = apiKey;
}
/*
let legacyOptions = {};
// Filter out any "examples" that are empty
legacyOptions.examples = (legacyOptions.examples ?? [])
.filter(Boolean)
.filter((obj) => obj?.input?.content !== '' && obj?.output?.content !== '');
// If user has "examples" from legacyOptions, push them onto llmConfig
if (legacyOptions.examples?.length) {
llmConfig.examples = legacyOptions.examples.map((ex) => {
const { input, output } = ex;
if (!input?.content || !output?.content) {return undefined;}
return {
input: new HumanMessage(input.content),
output: new AIMessage(output.content),
};
}).filter(Boolean);
}
*/
if (reverseProxyUrl) {
llmConfig.baseUrl = reverseProxyUrl;
}
if (authHeader) {
llmConfig.customHeaders = {
Authorization: `Bearer ${apiKey}`,
};
}
// Return the final shape
return {
/** @type {Providers.GOOGLE | Providers.VERTEXAI} */
provider,
/** @type {GoogleClientOptions | VertexAIClientOptions} */
llmConfig,
};
}
module.exports = {
getLLMConfig,
getSafetySettings,
};