🚀 feat: o1 Tool Calling & reasoning_effort (#5553)

* fix: Update @librechat/agents to version 1.9.98 * feat: o1 tool calling * fix: Improve error logging in RouteErrorBoundary * refactor: Move extractContent function to utils and clean up Artifact component * refactor: optimize reasoning UI post-streaming and deprecate plugins rendering * feat: reasoning_effort support * fix: update request content type handling in openapiToFunction to remove default 'application/x-www-form-urlencoded' * chore: bump v0.7.696 data-provider
2025-12-28 06:08:50 +01:00 · 2025-01-30 12:36:35 -05:00 · 2025-01-30 12:36:35 -05:00 · 587d46a20b
commit 587d46a20b
parent 591a019766
17 changed files with 752 additions and 685 deletions
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -1286,6 +1286,8 @@ ${convo}
      ) {
        delete modelOptions.stream;
        delete modelOptions.stop;
+      } else if (!this.isO1Model && modelOptions.reasoning_effort != null) {
+        delete modelOptions.reasoning_effort;
      }

      let reasoningKey = 'reasoning_content';
--- a/api/package.json
+++ b/api/package.json
@ -44,7 +44,7 @@
    "@langchain/google-genai": "^0.1.7",
    "@langchain/google-vertexai": "^0.1.8",
    "@langchain/textsplitters": "^0.1.0",
-    "@librechat/agents": "^1.9.97",
+    "@librechat/agents": "^1.9.98",
    "@waylaidwanderer/fetch-event-source": "^3.0.1",
    "axios": "^1.7.7",
    "bcryptjs": "^2.4.3",
--- a/api/server/controllers/agents/callbacks.js
+++ b/api/server/controllers/agents/callbacks.js
@ -3,6 +3,7 @@ const {
  EnvVar,
  Providers,
  GraphEvents,
+  getMessageId,
  ToolEndHandler,
  handleToolCalls,
  ChatModelStreamHandler,
@ -46,7 +47,7 @@ class ModelEndHandler {
    }

    try {
-      if (metadata.provider === Providers.GOOGLE) {
+      if (metadata.provider === Providers.GOOGLE || graph.clientOptions?.disableStreaming) {
        handleToolCalls(data?.output?.tool_calls, metadata, graph);
      }

@ -59,6 +60,38 @@ class ModelEndHandler {
      }

      this.collectedUsage.push(usage);
+      if (!graph.clientOptions?.disableStreaming) {
+        return;
+      }
+      if (!data.output.content) {
+        return;
+      }
+      const stepKey = graph.getStepKey(metadata);
+      const message_id = getMessageId(stepKey, graph) ?? '';
+      if (message_id) {
+        graph.dispatchRunStep(stepKey, {
+          type: StepTypes.MESSAGE_CREATION,
+          message_creation: {
+            message_id,
+          },
+        });
+      }
+      const stepId = graph.getStepIdByKey(stepKey);
+      const content = data.output.content;
+      if (typeof content === 'string') {
+        graph.dispatchMessageDelta(stepId, {
+          content: [
+            {
+              type: 'text',
+              text: content,
+            },
+          ],
+        });
+      } else if (content.every((c) => c.type?.startsWith('text'))) {
+        graph.dispatchMessageDelta(stepId, {
+          content,
+        });
+      }
    } catch (error) {
      logger.error('Error handling model end event:', error);
    }
--- a/api/server/controllers/agents/run.js
+++ b/api/server/controllers/agents/run.js
@ -41,6 +41,11 @@ async function createRun({
    agent.model_parameters,
  );

+  if (/o1(?!-(?:mini|preview)).*$/.test(llmConfig.model)) {
+    llmConfig.streaming = false;
+    llmConfig.disableStreaming = true;
+  }
+
  /** @type {StandardGraphConfig} */
  const graphConfig = {
    signal,
--- a/client/src/components/Artifacts/Artifact.tsx
+++ b/client/src/components/Artifacts/Artifact.tsx
@ -6,8 +6,8 @@ import type { Pluggable } from 'unified';
 import type { Artifact } from '~/common';
 import { useMessageContext, useArtifactContext } from '~/Providers';
 import { artifactsState } from '~/store/artifacts';
+import { logger, extractContent } from '~/utils';
 import ArtifactButton from './ArtifactButton';
-import { logger } from '~/utils';

 export const artifactPlugin: Pluggable = () => {
  return (tree) => {
@ -22,21 +22,6 @@ export const artifactPlugin: Pluggable = () => {
  };
 };

-const extractContent = (
-  children: React.ReactNode | { props: { children: React.ReactNode } } | string,
-): string => {
-  if (typeof children === 'string') {
-    return children;
-  }
-  if (React.isValidElement(children)) {
-    return extractContent((children.props as { children?: React.ReactNode }).children);
-  }
-  if (Array.isArray(children)) {
-    return children.map(extractContent).join('');
-  }
-  return '';
-};
-
 export function Artifact({
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
  node,
--- a/client/src/components/Artifacts/Thinking.tsx
+++ b/client/src/components/Artifacts/Thinking.tsx
@ -3,6 +3,7 @@ import { useRecoilValue } from 'recoil';
 import { Atom, ChevronDown } from 'lucide-react';
 import type { MouseEvent, FC } from 'react';
 import { useLocalize } from '~/hooks';
+import { cn } from '~/utils';
 import store from '~/store';

 const BUTTON_STYLES = {
@ -63,19 +64,21 @@ const Thinking: React.ElementType = memo(({ children }: { children: React.ReactN
  }

  return (
-    <div className="mb-3">
-      <ThinkingButton isExpanded={isExpanded} onClick={handleClick} label={label} />
+    <>
+      <div className="mb-5">
+        <ThinkingButton isExpanded={isExpanded} onClick={handleClick} label={label} />
+      </div>
      <div
-        className="grid transition-all duration-300 ease-out"
+        className={cn('grid transition-all duration-300 ease-out', isExpanded && 'mb-8')}
        style={{
          gridTemplateRows: isExpanded ? '1fr' : '0fr',
        }}
      >
        <div className="overflow-hidden">
-          <ThinkingContent>{children}</ThinkingContent>
+          <ThinkingContent isPart={true}>{children}</ThinkingContent>
        </div>
      </div>
-    </div>
+    </>
  );
 });

--- a/client/src/components/Chat/Messages/Content/Markdown.tsx
+++ b/client/src/components/Chat/Messages/Content/Markdown.tsx
@ -17,7 +17,6 @@ import {
 import { Artifact, artifactPlugin } from '~/components/Artifacts/Artifact';
 import { langSubset, preprocessLaTeX, handleDoubleClick } from '~/utils';
 import CodeBlock from '~/components/Messages/Content/CodeBlock';
-import Thinking from '~/components/Artifacts/Thinking';
 import { useFileDownload } from '~/data-provider';
 import useLocalize from '~/hooks/useLocalize';
 import store from '~/store';
@ -223,7 +222,6 @@ const Markdown = memo(({ content = '', showCursor, isLatestMessage }: TContentPr
              a,
              p,
              artifact: Artifact,
-              thinking: Thinking,
            } as {
              [nodeType: string]: React.ElementType;
            }
--- a/client/src/components/Chat/Messages/Content/MessageContent.tsx
+++ b/client/src/components/Chat/Messages/Content/MessageContent.tsx
@ -1,9 +1,9 @@
-import { Fragment, Suspense, useMemo } from 'react';
+import { memo, Suspense, useMemo } from 'react';
 import { useRecoilValue } from 'recoil';
-import type { TMessage, TResPlugin } from 'librechat-data-provider';
+import type { TMessage } from 'librechat-data-provider';
 import type { TMessageContentProps, TDisplayProps } from '~/common';
-import Plugin from '~/components/Messages/Content/Plugin';
 import Error from '~/components/Messages/Content/Error';
+import Thinking from '~/components/Artifacts/Thinking';
 import { DelayedRender } from '~/components/ui';
 import { useChatContext } from '~/Providers';
 import MarkdownLite from './MarkdownLite';
@ -117,7 +117,6 @@ export const UnfinishedMessage = ({ message }: { message: TMessage }) => (
  />
 );

-// Content Component
 const MessageContent = ({
  text,
  edit,
@ -127,72 +126,49 @@ const MessageContent = ({
  isLast,
  ...props
 }: TMessageContentProps) => {
+  const { message } = props;
+  const { messageId } = message;
+
+  const { thinkingContent, regularContent } = useMemo(() => {
+    const thinkingMatch = text.match(/:::thinking([\s\S]*?):::/);
+    return {
+      thinkingContent: thinkingMatch ? thinkingMatch[1].trim() : '',
+      regularContent: thinkingMatch ? text.replace(/:::thinking[\s\S]*?:::/, '').trim() : text,
+    };
+  }, [text]);
+
+  const showRegularCursor = useMemo(() => isLast && isSubmitting, [isLast, isSubmitting]);
+
+  const unfinishedMessage = useMemo(
+    () =>
+      !isSubmitting && unfinished ? (
+        <Suspense>
+          <DelayedRender delay={250}>
+            <UnfinishedMessage message={message} />
+          </DelayedRender>
+        </Suspense>
+      ) : null,
+    [isSubmitting, unfinished, message],
+  );
+
  if (error) {
    return <ErrorMessage message={props.message} text={text} />;
  } else if (edit) {
    return <EditMessage text={text} isSubmitting={isSubmitting} {...props} />;
-  } else {
-    const marker = ':::plugin:::\n';
-    const splitText = text.split(marker);
-    const { message } = props;
-    const { plugins, messageId } = message;
-    const displayedIndices = new Set<number>();
-    // Function to get the next non-empty text index
-    const getNextNonEmptyTextIndex = (currentIndex: number) => {
-      for (let i = currentIndex + 1; i < splitText.length; i++) {
-        // Allow the last index to be last in case it has text
-        // this may need to change if I add back streaming
-        if (i === splitText.length - 1) {
-          return currentIndex;
-        }
-
-        if (splitText[i].trim() !== '' && !displayedIndices.has(i)) {
-          return i;
-        }
-      }
-      return currentIndex; // If no non-empty text is found, return the current index
-    };
-
-    return splitText.map((text, idx) => {
-      let currentText = text.trim();
-      let plugin: TResPlugin | null = null;
-
-      if (plugins) {
-        plugin = plugins[idx];
-      }
-
-      // If the current text is empty, get the next non-empty text index
-      const displayTextIndex = currentText === '' ? getNextNonEmptyTextIndex(idx) : idx;
-      currentText = splitText[displayTextIndex];
-      const isLastIndex = displayTextIndex === splitText.length - 1;
-      const isEmpty = currentText.trim() === '';
-      const showText =
-        (currentText && !isEmpty && !displayedIndices.has(displayTextIndex)) ||
-        (isEmpty && isLastIndex);
-      displayedIndices.add(displayTextIndex);
-
-      return (
-        <Fragment key={idx}>
-          {plugin && <Plugin key={`plugin-${messageId}-${idx}`} plugin={plugin} />}
-          {showText ? (
-            <DisplayMessage
-              key={`display-${messageId}-${idx}`}
-              showCursor={isLastIndex && isLast && isSubmitting}
-              text={currentText}
-              {...props}
-            />
-          ) : null}
-          {!isSubmitting && unfinished && (
-            <Suspense>
-              <DelayedRender delay={250}>
-                <UnfinishedMessage message={message} key={`unfinished-${messageId}-${idx}`} />
-              </DelayedRender>
-            </Suspense>
-          )}
-        </Fragment>
-      );
-    });
  }
+
+  return (
+    <>
+      {thinkingContent && <Thinking key={`thinking-${messageId}`}>{thinkingContent}</Thinking>}
+      <DisplayMessage
+        key={`display-${messageId}`}
+        showCursor={showRegularCursor}
+        text={regularContent}
+        {...props}
+      />
+      {unfinishedMessage}
+    </>
+  );
 };

-export default MessageContent;
+export default memo(MessageContent);
--- a/client/src/components/Chat/Messages/Content/Parts/Reasoning.tsx
+++ b/client/src/components/Chat/Messages/Content/Parts/Reasoning.tsx
@ -18,7 +18,7 @@ const Reasoning = memo(({ reasoning }: ReasoningProps) => {
    <div
      className={cn(
        'grid transition-all duration-300 ease-out',
-        nextType !== ContentTypes.THINK && isExpanded && 'mb-10',
+        nextType !== ContentTypes.THINK && isExpanded && 'mb-8',
      )}
      style={{
        gridTemplateRows: isExpanded ? '1fr' : '0fr',
--- a/client/src/components/SidePanel/Parameters/settings.ts
+++ b/client/src/components/SidePanel/Parameters/settings.ts
@ -3,6 +3,7 @@ import {
  EModelEndpoint,
  openAISettings,
  googleSettings,
+  ReasoningEffort,
  BedrockProviders,
  anthropicSettings,
 } from 'librechat-data-provider';
@ -203,6 +204,19 @@ const openAIParams: Record<string, SettingDefinition> = {
    optionType: 'model',
    columnSpan: 2,
  },
+  reasoning_effort: {
+    key: 'reasoning_effort',
+    label: 'com_endpoint_reasoning_effort',
+    labelCode: true,
+    description: 'com_endpoint_openai_reasoning_effort',
+    descriptionCode: true,
+    type: 'enum',
+    default: ReasoningEffort.medium,
+    component: 'slider',
+    options: [ReasoningEffort.low, ReasoningEffort.medium, ReasoningEffort.high],
+    optionType: 'model',
+    columnSpan: 4,
+  },
 };

 const anthropic: Record<string, SettingDefinition> = {
@ -446,6 +460,7 @@ const openAI: SettingsConfiguration = [
  baseDefinitions.stop,
  librechat.resendFiles,
  baseDefinitions.imageDetail,
+  openAIParams.reasoning_effort,
 ];

 const openAICol1: SettingsConfiguration = [
@ -453,6 +468,7 @@ const openAICol1: SettingsConfiguration = [
  openAIParams.chatGptLabel,
  librechat.promptPrefix,
  librechat.maxContextTokens,
+  openAIParams.reasoning_effort,
 ];

 const openAICol2: SettingsConfiguration = [
--- a/client/src/localization/languages/Eng.ts
+++ b/client/src/localization/languages/Eng.ts
@ -578,6 +578,7 @@ export default {
  com_endpoint_top_k: 'Top K',
  com_endpoint_max_output_tokens: 'Max Output Tokens',
  com_endpoint_stop: 'Stop Sequences',
+  com_endpoint_reasoning_effort: 'Reasoning Effort',
  com_endpoint_stop_placeholder: 'Separate values by pressing `Enter`',
  com_endpoint_openai_max_tokens: `Optional \`max_tokens\` field, representing the maximum number of tokens that can be generated in the chat completion.

@ -596,6 +597,8 @@ export default {
    'Resend all previously attached images. Note: this can significantly increase token cost and you may experience errors with many image attachments.',
  com_endpoint_openai_resend_files:
    'Resend all previously attached files. Note: this will increase token cost and you may experience errors with many attachments.',
+  com_endpoint_openai_reasoning_effort:
+    'o1 models only: constrains effort on reasoning for reasoning models. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.',
  com_endpoint_openai_detail:
    'The resolution for Vision requests. "Low" is cheaper and faster, "High" is more detailed and expensive, and "Auto" will automatically choose between the two based on the image resolution.',
  com_endpoint_openai_stop: 'Up to 4 sequences where the API will stop generating further tokens.',
--- a/client/src/routes/RouteErrorBoundary.tsx
+++ b/client/src/routes/RouteErrorBoundary.tsx
@ -1,5 +1,6 @@
 import { useRouteError } from 'react-router-dom';
 import { Button } from '~/components/ui';
+import logger from '~/utils/logger';

 interface UserAgentData {
  getHighEntropyValues(hints: string[]): Promise<{ platform: string; platformVersion: string }>;
@ -31,7 +32,8 @@ const getPlatformInfo = async (): Promise<PlatformInfo> => {
        version: highEntropyValues.platformVersion,
      };
    } catch (e) {
-      console.warn('Failed to get high entropy values:', e);
+      logger.warn('Failed to get high entropy values');
+      logger.error(e);
    }
  }

@ -85,28 +87,33 @@ export default function RouteErrorBoundary() {
  };

  const handleDownloadLogs = async () => {
-    const browser = await getBrowserInfo();
-    const errorLog = {
-      timestamp: new Date().toISOString(),
-      browser,
-      error: {
-        ...errorDetails,
-        stack:
-          errorDetails.stack != null && errorDetails.stack.trim() !== ''
-            ? formatStackTrace(errorDetails.stack)
-            : undefined,
-      },
-    };
+    try {
+      const browser = await getBrowserInfo();
+      const errorLog = {
+        timestamp: new Date().toISOString(),
+        browser,
+        error: {
+          ...errorDetails,
+          stack:
+            errorDetails.stack != null && errorDetails.stack.trim() !== ''
+              ? formatStackTrace(errorDetails.stack)
+              : undefined,
+        },
+      };

-    const blob = new Blob([JSON.stringify(errorLog, null, 2)], { type: 'application/json' });
-    const url = URL.createObjectURL(blob);
-    const a = document.createElement('a');
-    a.href = url;
-    a.download = `error-log-${new Date().toISOString()}.json`;
-    document.body.appendChild(a);
-    a.click();
-    document.body.removeChild(a);
-    URL.revokeObjectURL(url);
+      const blob = new Blob([JSON.stringify(errorLog, null, 2)], { type: 'application/json' });
+      const url = URL.createObjectURL(blob);
+      const a = document.createElement('a');
+      a.href = url;
+      a.download = `error-log-${new Date().toISOString()}.json`;
+      document.body.appendChild(a);
+      a.click();
+      document.body.removeChild(a);
+      URL.revokeObjectURL(url);
+    } catch (e) {
+      logger.warn('Failed to download error logs:');
+      logger.error(e);
+    }
  };

  const handleCopyStack = async () => {
--- a/client/src/utils/index.ts
+++ b/client/src/utils/index.ts
@ -1,3 +1,5 @@
+import React from 'react';
+
 export * from './map';
 export * from './json';
 export * from './files';
@ -82,3 +84,18 @@ export const handleDoubleClick: React.MouseEventHandler<HTMLElement> = (event) =
  selection.removeAllRanges();
  selection.addRange(range);
 };
+
+export const extractContent = (
+  children: React.ReactNode | { props: { children: React.ReactNode } } | string,
+): string => {
+  if (typeof children === 'string') {
+    return children;
+  }
+  if (React.isValidElement(children)) {
+    return extractContent((children.props as { children?: React.ReactNode }).children);
+  }
+  if (Array.isArray(children)) {
+    return children.map(extractContent).join('');
+  }
+  return '';
+};
--- a/package-lock.json
+++ b/package-lock.json
--- a/packages/data-provider/package.json
+++ b/packages/data-provider/package.json
@ -1,6 +1,6 @@
 {
  "name": "librechat-data-provider",
-  "version": "0.7.695",
+  "version": "0.7.696",
  "description": "data services for librechat apps",
  "main": "dist/index.js",
  "module": "dist/index.es.js",
--- a/packages/data-provider/src/actions.ts
+++ b/packages/data-provider/src/actions.ts
@ -427,8 +427,8 @@ export function openapiToFunction(
        path,
        method,
        operationId,
-        !!(operationObj['x-openai-isConsequential'] ?? false), // Custom extension for consequential actions
-        operationObj.requestBody ? 'application/json' : 'application/x-www-form-urlencoded',
+        !!(operationObj['x-openai-isConsequential'] ?? false),
+        operationObj.requestBody ? 'application/json' : '',
      );

      requestBuilders[operationId] = actionRequest;
--- a/packages/data-provider/src/schemas.ts
+++ b/packages/data-provider/src/schemas.ts
@ -110,6 +110,12 @@ export enum ImageDetail {
  high = 'high',
 }

+export enum ReasoningEffort {
+  low = 'low',
+  medium = 'medium',
+  high = 'high',
+}
+
 export const imageDetailNumeric = {
  [ImageDetail.low]: 0,
  [ImageDetail.auto]: 1,
@ -123,6 +129,7 @@ export const imageDetailValue = {
 };

 export const eImageDetailSchema = z.nativeEnum(ImageDetail);
+export const eReasoningEffortSchema = z.nativeEnum(ReasoningEffort);

 export const defaultAssistantFormValues = {
  assistant: '',
@ -564,6 +571,8 @@ export const tConversationSchema = z.object({
  file_ids: z.array(z.string()).optional(),
  /* vision */
  imageDetail: eImageDetailSchema.optional(),
+  /* OpenAI: o1 only */
+  reasoning_effort: eReasoningEffortSchema.optional(),
  /* assistant */
  assistant_id: z.string().optional(),
  /* agents */
@ -1055,6 +1064,7 @@ export const openAISchema = tConversationSchema
    spec: true,
    maxContextTokens: true,
    max_tokens: true,
+    reasoning_effort: true,
  })
  .transform((obj: Partial<TConversation>) => removeNullishValues(obj))
  .catch(() => ({}));