🧠 feat: Thinking Budget, Include Thoughts, and Dynamic Thinking for Gemini 2.5 (#8055)

* feat: support thinking budget parameter for Gemini 2.5 series (#6949, #7542)

https://ai.google.dev/gemini-api/docs/thinking#set-budget

* refactor: update thinking budget minimum value to -1 for dynamic thinking

- see: https://ai.google.dev/gemini-api/docs/thinking#set-budget

* chore: bump @librechat/agents to v2.4.43

* refactor: rename LLMConfigOptions to OpenAIConfigOptions for clarity and consistency

- Updated type definitions and references in initialize.ts, llm.ts, and openai.ts to reflect the new naming convention.
- Ensured that the OpenAI configuration options are consistently used across the relevant files.

* refactor: port Google LLM methods to TypeScript Package

* chore: update @librechat/agents version to 2.4.43 in package-lock.json and package.json

* refactor: update thinking budget description for clarity and adjust placeholder in parameter settings

* refactor: enhance googleSettings default value for thinking budget to support dynamic adjustment

* chore: update @librechat/agents to v2.4.44 for Vertex Dynamic Thinking workaround

* refactor: rename google config function, update `createRun` types, use `reasoning` as `reasoningKey` for Google

* refactor: simplify placeholder handling in DynamicInput component

* refactor: enhance thinking budget description for clarity and allow automatic decision by setting to "-1"

* refactor: update text styling in OptionHover component for improved readability

* chore: update @librechat/agents dependency to v2.4.46 in package.json and package-lock.json

* chore: update @librechat/api version to 1.2.5 in package.json and package-lock.json

* refactor: enhance `clientOptions` handling by filtering `omitTitleOptions`, add `json` field for Google models

---------

Co-authored-by: ciffelia <15273128+ciffelia@users.noreply.github.com>
This commit is contained in:
Danny Avila 2025-06-25 15:14:33 -04:00 committed by GitHub
parent b169306096
commit c87422a1e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 212 additions and 108 deletions

View file

@ -46,6 +46,10 @@ function DynamicInput({
setInputValue(e, !isNaN(Number(e.target.value)));
};
const placeholderText = placeholderCode
? localize(placeholder as TranslationKeys) || placeholder
: placeholder;
return (
<div
className={`flex flex-col items-center justify-start gap-6 ${
@ -76,11 +80,7 @@ function DynamicInput({
disabled={readonly}
value={inputValue ?? defaultValue ?? ''}
onChange={handleInputChange}
placeholder={
placeholderCode
? localize(placeholder as TranslationKeys) || placeholder
: placeholder
}
placeholder={placeholderText}
className={cn(
'flex h-10 max-h-10 w-full resize-none border-none bg-surface-secondary px-3 py-2',
)}

View file

@ -29,7 +29,7 @@ function OptionHover({
<HoverCardPortal>
<HoverCardContent side={side} className={`z-[999] w-80 ${className}`} sideOffset={sideOffset}>
<div className="space-y-2">
<p className="text-sm text-gray-600 dark:text-gray-300">{text}</p>
<p className="whitespace-pre-wrap text-sm text-text-secondary">{text}</p>
</div>
</HoverCardContent>
</HoverCardPortal>

View file

@ -207,6 +207,8 @@
"com_endpoint_google_temp": "Higher values = more random, while lower values = more focused and deterministic. We recommend altering this or Top P but not both.",
"com_endpoint_google_topk": "Top-k changes how the model selects tokens for output. A top-k of 1 means the selected token is the most probable among all tokens in the model's vocabulary (also called greedy decoding), while a top-k of 3 means that the next token is selected from among the 3 most probable tokens (using temperature).",
"com_endpoint_google_topp": "Top-p changes how the model selects tokens for output. Tokens are selected from most K (see topK parameter) probable to least until the sum of their probabilities equals the top-p value.",
"com_endpoint_google_thinking": "Enables or disables reasoning. This setting is only supported by certain models (2.5 series). For older models, this setting may have no effect.",
"com_endpoint_google_thinking_budget": "Guides the number of thinking tokens the model uses. The actual amount may exceed or fall below this value depending on the prompt.\n\nThis setting is only supported by certain models (2.5 series). Gemini 2.5 Pro supports 128-32,768 tokens. Gemini 2.5 Flash supports 0-24,576 tokens. Gemini 2.5 Flash Lite supports 512-24,576 tokens.\n\nLeave blank or set to \"-1\" to let the model automatically decide when and how much to think. By default, Gemini 2.5 Flash Lite does not think.",
"com_endpoint_instructions_assistants": "Override Instructions",
"com_endpoint_instructions_assistants_placeholder": "Overrides the instructions of the assistant. This is useful for modifying the behavior on a per-run basis.",
"com_endpoint_max_output_tokens": "Max Output Tokens",
@ -582,6 +584,7 @@
"com_ui_auth_url": "Authorization URL",
"com_ui_authentication": "Authentication",
"com_ui_authentication_type": "Authentication Type",
"com_ui_auto": "Auto",
"com_ui_available_tools": "Available Tools",
"com_ui_avatar": "Avatar",
"com_ui_azure": "Azure",