📑 feat: Anthropic Direct Provider Upload (#9072)

* feat: implement Anthropic native PDF support with document preservation

- Add comprehensive debug logging throughout PDF processing pipeline
- Refactor attachment processing to separate image and document handling
- Create distinct addImageURLs(), addDocuments(), and processAttachments() methods
- Fix critical bugs in stream handling and parameter passing
- Add streamToBuffer utility for proper stream-to-buffer conversion
- Remove api/agents submodule from repository

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* chore: remove out of scope formatting changes

* fix: stop duplication of file in chat on end of response stream

* chore: bring back file search and ocr options

* chore: localize upload to provider string in file menu

* refactor: change createMenuItems args to fit new pattern introduced by anthropic-native-pdf-support

* feat: add cache point for pdfs processed by anthropic endpoint since they are unlikely to change and should benefit from caching

* feat: combine Upload Image into Upload to Provider since they both perform direct upload and change provider upload icon to reflect multimodal upload

* feat: add citations support according to docs

* refactor: remove redundant 'document' check since documents are handled properly by formatMessage in the agents repo now

* refactor: change upload logic so anthropic endpoint isn't exempted from normal upload path using Agents for consistency with the rest of the upload logic

* fix: include width and height in return from uploadLocalFile so images are correctly identified when going through an AgentUpload in addImageURLs

* chore: remove client specific handling since the direct provider stuff is handled by the agent client

* feat: handle documents in AgentClient so no need for change to agents repo

* chore: removed unused changes

* chore: remove auto generated comments from OG commit

* feat: add logic for agents to use direct to provider uploads if supported (currently just anthropic)

* fix: reintroduce role check to fix render error because of undefined value for Content Part

* fix: actually fix render bug by using proper isCreatedByUser check and making sure our mutation of formattedMessage.content is consistent

---------

Co-authored-by: Andres Restrepo <andres@thelinuxkid.com>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Dustin Healy 2025-08-15 19:46:00 -07:00 committed by Dustin Healy
parent 48f6f8f2f8
commit 89843262b2
14 changed files with 398 additions and 14 deletions

View file

@ -21,6 +21,7 @@ export type TAgentCapabilities = {
[AgentCapabilities.execute_code]: boolean;
[AgentCapabilities.end_after_tools]?: boolean;
[AgentCapabilities.hide_sequential_outputs]?: boolean;
[AgentCapabilities.direct_upload]?: boolean;
};
export type AgentForm = {

View file

@ -36,6 +36,7 @@ function AttachFileChat({ disableInputs }: { disableInputs: boolean }) {
disabled={disableInputs}
conversationId={conversationId}
endpointFileConfig={endpointFileConfig}
endpoint={endpoint}
/>
);
}

View file

@ -1,7 +1,13 @@
import React, { useRef, useState, useMemo } from 'react';
import * as Ariakit from '@ariakit/react';
import { useSetRecoilState } from 'recoil';
import { FileSearch, ImageUpIcon, TerminalSquareIcon, FileType2Icon } from 'lucide-react';
import {
FileSearch,
ImageUpIcon,
TerminalSquareIcon,
FileType2Icon,
FileImageIcon,
} from 'lucide-react';
import { EToolResources, EModelEndpoint, defaultAgentCapabilities } from 'librechat-data-provider';
import {
FileUpload,
@ -14,8 +20,9 @@ import type { EndpointFileConfig } from 'librechat-data-provider';
import { useLocalize, useGetAgentsConfig, useFileHandling, useAgentCapabilities } from '~/hooks';
import useSharePointFileHandling from '~/hooks/Files/useSharePointFileHandling';
import { SharePointPickerDialog } from '~/components/SharePoint';
import { useGetStartupConfig } from '~/data-provider';
import { useGetStartupConfig, useGetAgentByIdQuery } from '~/data-provider';
import { ephemeralAgentByConvoId } from '~/store';
import { useChatContext } from '~/Providers/ChatContext';
import { MenuItemProps } from '~/common';
import { cn } from '~/utils';
@ -23,9 +30,15 @@ interface AttachFileMenuProps {
conversationId: string;
disabled?: boolean | null;
endpointFileConfig?: EndpointFileConfig;
endpoint?: string | null;
}
const AttachFileMenu = ({ disabled, conversationId, endpointFileConfig }: AttachFileMenuProps) => {
const AttachFileMenu = ({
disabled,
conversationId,
endpointFileConfig,
endpoint,
}: AttachFileMenuProps) => {
const localize = useLocalize();
const isUploadDisabled = disabled ?? false;
const inputRef = useRef<HTMLInputElement>(null);
@ -46,34 +59,68 @@ const AttachFileMenu = ({ disabled, conversationId, endpointFileConfig }: Attach
const [isSharePointDialogOpen, setIsSharePointDialogOpen] = useState(false);
const { agentsConfig } = useGetAgentsConfig();
const { conversation } = useChatContext();
// Get agent details if using an agent
const { data: agent } = useGetAgentByIdQuery(conversation?.agent_id ?? '', {
enabled: !!conversation?.agent_id && conversation?.agent_id !== 'ephemeral',
});
/** TODO: Ephemeral Agent Capabilities
* Allow defining agent capabilities on a per-endpoint basis
* Use definition for agents endpoint for ephemeral agents
* */
const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities);
const handleUploadClick = (isImage?: boolean) => {
const handleUploadClick = (fileType?: 'image' | 'document' | 'anthropic_multimodal') => {
if (!inputRef.current) {
return;
}
inputRef.current.value = '';
inputRef.current.accept = isImage === true ? 'image/*' : '';
if (fileType === 'image') {
inputRef.current.accept = 'image/*';
} else if (fileType === 'document') {
inputRef.current.accept = '.pdf,application/pdf';
} else if (fileType === 'anthropic_multimodal') {
inputRef.current.accept = 'image/*,.pdf,application/pdf';
} else {
inputRef.current.accept = '';
}
inputRef.current.click();
inputRef.current.accept = '';
};
const dropdownItems = useMemo(() => {
const createMenuItems = (onAction: (isImage?: boolean) => void) => {
const items: MenuItemProps[] = [
{
const createMenuItems = (
onAction: (fileType?: 'image' | 'document' | 'anthropic_multimodal') => void,
) => {
const items: MenuItemProps[] = [];
// this is temporary until i add direct upload support for the other providers and can make a more robust solution
const isAnthropicAgent = agent?.provider === 'anthropic';
const shouldShowDirectUpload = endpoint === EModelEndpoint.anthropic || isAnthropicAgent;
if (!shouldShowDirectUpload) {
items.push({
label: localize('com_ui_upload_image_input'),
onClick: () => {
setToolResource(undefined);
onAction(true);
onAction('image');
},
icon: <ImageUpIcon className="icon-md" />,
},
];
});
}
if (shouldShowDirectUpload) {
items.push({
label: localize('com_ui_upload_provider'),
onClick: () => {
setToolResource(EToolResources.direct_upload);
onAction('anthropic_multimodal');
},
icon: <FileImageIcon className="icon-md" />,
});
}
if (capabilities.ocrEnabled) {
items.push({
@ -139,6 +186,7 @@ const AttachFileMenu = ({ disabled, conversationId, endpointFileConfig }: Attach
setEphemeralAgent,
sharePointEnabled,
setIsSharePointDialogOpen,
endpoint,
]);
const menuTrigger = (

View file

@ -9,6 +9,7 @@ interface AgentCapabilitiesResult {
fileSearchEnabled: boolean;
webSearchEnabled: boolean;
codeEnabled: boolean;
directUploadEnabled: boolean;
}
export default function useAgentCapabilities(
@ -49,6 +50,11 @@ export default function useAgentCapabilities(
[capabilities],
);
const directUploadEnabled = useMemo(
() => capabilities?.includes(AgentCapabilities.direct_upload) ?? false,
[capabilities],
);
return {
ocrEnabled,
codeEnabled,
@ -57,5 +63,6 @@ export default function useAgentCapabilities(
artifactsEnabled,
webSearchEnabled,
fileSearchEnabled,
directUploadEnabled,
};
}

View file

@ -1218,6 +1218,7 @@
"com_ui_upload_invalid": "Invalid file for upload. Must be an image not exceeding the limit",
"com_ui_upload_invalid_var": "Invalid file for upload. Must be an image not exceeding {{0}} MB",
"com_ui_upload_ocr_text": "Upload as Text",
"com_ui_upload_provider": "Upload to Provider",
"com_ui_upload_success": "Successfully uploaded file",
"com_ui_upload_type": "Select Upload Type",
"com_ui_usage": "Usage",