From b5aadf1302fc6807a24066a3d189780050368a0a Mon Sep 17 00:00:00 2001
From: Dustin Healy <54083382+dustinhealy@users.noreply.github.com>
Date: Sun, 17 Aug 2025 02:14:25 -0700
Subject: [PATCH] =?UTF-8?q?=F0=9F=93=81=20feat:=20Send=20Attachments=20Dir?=
=?UTF-8?q?ectly=20to=20Provider=20(OpenAI)=20(#9098)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* refactor: change references from direct upload to direct attach to better reflect functionality
since we are just using base64 encoding strategy now rather than Files/File API for sending our attachments directly to the provider, the upload nomenclature no longer makes sense. direct_attach better describes the different methods of sending attachments to providers anyways even if we later introduce direct upload support
* feat: add upload to provider option for openai (and agent) ui
* chore: move anthropic pdf validator over to packages/api
* feat: simple pdf validation according to openai docs
* feat: add provider agnostic validatePdf logic to start handling multiple endpoints
* feat: add handling for openai specific documentPart formatting
* refactor: move require statement to proper place at top of file
* chore: add in openAI endpoint for the rest of the document handling logic
* feat: add direct attach support for azureOpenAI endpoint and agents
* feat: add pdf validation for azureOpenAI endpoint
* refactor: unify all the endpoint checks with isDocumentSupportedEndpoint
* refactor: consolidate Upload to Provider vs Upload image logic for clarity
* refactor: remove anthropic from anthropic_multimodal fileType since we support multiple providers now
---
api/server/controllers/agents/client.js | 22 +++++---
api/server/services/Files/documents/encode.js | 41 ++++++++------
client/src/common/agents-types.ts | 2 +-
.../Chat/Input/Files/AttachFileMenu.tsx | 39 +++++++-------
.../src/hooks/Agents/useAgentCapabilities.ts | 8 +--
packages/api/src/files/index.ts | 1 +
.../api/src/files/validation.ts | 54 ++++++++++++++-----
packages/data-provider/src/config.ts | 4 +-
packages/data-provider/src/schemas.ts | 13 +++++
.../data-provider/src/types/assistants.ts | 2 +-
10 files changed, 122 insertions(+), 64 deletions(-)
rename api/server/services/Files/validation/pdfValidator.js => packages/api/src/files/validation.ts (56%)
diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js
index 4c63ddaa9f..ba385dff3a 100644
--- a/api/server/controllers/agents/client.js
+++ b/api/server/controllers/agents/client.js
@@ -33,7 +33,16 @@ const {
AgentCapabilities,
bedrockInputSchema,
removeNullishValues,
+ isDocumentSupportedEndpoint,
} = require('librechat-data-provider');
+const {
+ findPluginAuthsByKeys,
+ getFormattedMemories,
+ deleteMemory,
+ setMemory,
+} = require('~/models');
+const { getMCPAuthMap, checkCapability, hasCustomUserVars } = require('~/server/services/Config');
+const { encodeAndFormatDocuments } = require('~/server/services/Files/documents/encode');
const { addCacheControl, createContextHandlers } = require('~/app/clients/prompts');
const { initializeAgent } = require('~/server/services/Endpoints/agents/agent');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
@@ -223,12 +232,11 @@ class AgentClient extends BaseClient {
}
async addDocuments(message, attachments) {
- const documentResult =
- await require('~/server/services/Files/documents').encodeAndFormatDocuments(
- this.options.req,
- attachments,
- this.options.agent.provider,
- );
+ const documentResult = await encodeAndFormatDocuments(
+ this.options.req,
+ attachments,
+ this.options.agent.provider,
+ );
message.documents =
documentResult.documents && documentResult.documents.length
? documentResult.documents
@@ -318,7 +326,7 @@ class AgentClient extends BaseClient {
message.documents &&
message.documents.length > 0 &&
message.isCreatedByUser &&
- this.options.agent.provider === EModelEndpoint.anthropic
+ isDocumentSupportedEndpoint(this.options.agent.provider)
) {
const contentParts = [];
contentParts.push(...message.documents);
diff --git a/api/server/services/Files/documents/encode.js b/api/server/services/Files/documents/encode.js
index d5f66ec7a7..4042238ea1 100644
--- a/api/server/services/Files/documents/encode.js
+++ b/api/server/services/Files/documents/encode.js
@@ -1,6 +1,6 @@
-const { EModelEndpoint } = require('librechat-data-provider');
+const { EModelEndpoint, isDocumentSupportedEndpoint } = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
-const { validateAnthropicPdf } = require('../validation/pdfValidator');
+const { validatePdf } = require('@librechat/api');
/**
* Converts a readable stream to a buffer.
@@ -71,7 +71,7 @@ async function encodeAndFormatDocuments(req, files, endpoint) {
/** @type {FileSources} */
const source = file.source ?? 'local';
- if (file.type !== 'application/pdf' || endpoint !== EModelEndpoint.anthropic) {
+ if (file.type !== 'application/pdf' || !isDocumentSupportedEndpoint(endpoint)) {
continue;
}
@@ -132,26 +132,35 @@ async function encodeAndFormatDocuments(req, files, endpoint) {
continue;
}
- if (file.type === 'application/pdf' && endpoint === EModelEndpoint.anthropic) {
+ if (file.type === 'application/pdf' && isDocumentSupportedEndpoint(endpoint)) {
const pdfBuffer = Buffer.from(content, 'base64');
- const validation = await validateAnthropicPdf(pdfBuffer, pdfBuffer.length);
+ const validation = await validatePdf(pdfBuffer, pdfBuffer.length, endpoint);
if (!validation.isValid) {
throw new Error(`PDF validation failed: ${validation.error}`);
}
- const documentPart = {
- type: 'document',
- source: {
- type: 'base64',
- media_type: 'application/pdf',
- data: content,
- },
- cache_control: { type: 'ephemeral' },
- citations: { enabled: true },
- };
+ if (endpoint === EModelEndpoint.anthropic) {
+ const documentPart = {
+ type: 'document',
+ source: {
+ type: 'base64',
+ media_type: 'application/pdf',
+ data: content,
+ },
+ cache_control: { type: 'ephemeral' },
+ citations: { enabled: true },
+ };
+ result.documents.push(documentPart);
+ } else if (endpoint === EModelEndpoint.openAI) {
+ const documentPart = {
+ type: 'input_file',
+ filename: file.filename,
+ file_data: `data:application/pdf;base64,${content}`,
+ };
+ result.documents.push(documentPart);
+ }
- result.documents.push(documentPart);
result.files.push(metadata);
}
}
diff --git a/client/src/common/agents-types.ts b/client/src/common/agents-types.ts
index 2d18bfe572..f150b41eef 100644
--- a/client/src/common/agents-types.ts
+++ b/client/src/common/agents-types.ts
@@ -21,7 +21,7 @@ export type TAgentCapabilities = {
[AgentCapabilities.execute_code]: boolean;
[AgentCapabilities.end_after_tools]?: boolean;
[AgentCapabilities.hide_sequential_outputs]?: boolean;
- [AgentCapabilities.direct_upload]?: boolean;
+ [AgentCapabilities.direct_attach]?: boolean;
};
export type AgentForm = {
diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx
index 6a31535755..bfd97c5021 100644
--- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx
+++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx
@@ -8,7 +8,12 @@ import {
FileType2Icon,
FileImageIcon,
} from 'lucide-react';
-import { EToolResources, EModelEndpoint, defaultAgentCapabilities } from 'librechat-data-provider';
+import {
+ EToolResources,
+ EModelEndpoint,
+ defaultAgentCapabilities,
+ isDocumentSupportedEndpoint,
+} from 'librechat-data-provider';
import {
FileUpload,
TooltipAnchor,
@@ -72,7 +77,7 @@ const AttachFileMenu = ({
* */
const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities);
- const handleUploadClick = (fileType?: 'image' | 'document' | 'anthropic_multimodal') => {
+ const handleUploadClick = (fileType?: 'image' | 'document' | 'multimodal') => {
if (!inputRef.current) {
return;
}
@@ -81,7 +86,7 @@ const AttachFileMenu = ({
inputRef.current.accept = 'image/*';
} else if (fileType === 'document') {
inputRef.current.accept = '.pdf,application/pdf';
- } else if (fileType === 'anthropic_multimodal') {
+ } else if (fileType === 'multimodal') {
inputRef.current.accept = 'image/*,.pdf,application/pdf';
} else {
inputRef.current.accept = '';
@@ -92,15 +97,22 @@ const AttachFileMenu = ({
const dropdownItems = useMemo(() => {
const createMenuItems = (
- onAction: (fileType?: 'image' | 'document' | 'anthropic_multimodal') => void,
+ onAction: (fileType?: 'image' | 'document' | 'multimodal') => void,
) => {
const items: MenuItemProps[] = [];
- // this is temporary until i add direct upload support for the other providers and can make a more robust solution
- const isAnthropicAgent = agent?.provider === 'anthropic';
- const shouldShowDirectUpload = endpoint === EModelEndpoint.anthropic || isAnthropicAgent;
+ const shouldShowDirectAttach = isDocumentSupportedEndpoint(agent?.provider ?? endpoint);
- if (!shouldShowDirectUpload) {
+ if (shouldShowDirectAttach) {
+ items.push({
+ label: localize('com_ui_upload_provider'),
+ onClick: () => {
+ setToolResource(EToolResources.direct_attach);
+ onAction('multimodal');
+ },
+ icon: ,
+ });
+ } else {
items.push({
label: localize('com_ui_upload_image_input'),
onClick: () => {
@@ -111,17 +123,6 @@ const AttachFileMenu = ({
});
}
- if (shouldShowDirectUpload) {
- items.push({
- label: localize('com_ui_upload_provider'),
- onClick: () => {
- setToolResource(EToolResources.direct_upload);
- onAction('anthropic_multimodal');
- },
- icon: ,
- });
- }
-
if (capabilities.ocrEnabled) {
items.push({
label: localize('com_ui_upload_ocr_text'),
diff --git a/client/src/hooks/Agents/useAgentCapabilities.ts b/client/src/hooks/Agents/useAgentCapabilities.ts
index 74d0d0d260..6c86a8602c 100644
--- a/client/src/hooks/Agents/useAgentCapabilities.ts
+++ b/client/src/hooks/Agents/useAgentCapabilities.ts
@@ -9,7 +9,7 @@ interface AgentCapabilitiesResult {
fileSearchEnabled: boolean;
webSearchEnabled: boolean;
codeEnabled: boolean;
- directUploadEnabled: boolean;
+ directAttachEnabled: boolean;
}
export default function useAgentCapabilities(
@@ -50,8 +50,8 @@ export default function useAgentCapabilities(
[capabilities],
);
- const directUploadEnabled = useMemo(
- () => capabilities?.includes(AgentCapabilities.direct_upload) ?? false,
+ const directAttachEnabled = useMemo(
+ () => capabilities?.includes(AgentCapabilities.direct_attach) ?? false,
[capabilities],
);
@@ -63,6 +63,6 @@ export default function useAgentCapabilities(
artifactsEnabled,
webSearchEnabled,
fileSearchEnabled,
- directUploadEnabled,
+ directAttachEnabled,
};
}
diff --git a/packages/api/src/files/index.ts b/packages/api/src/files/index.ts
index fa156f15f1..a19584efca 100644
--- a/packages/api/src/files/index.ts
+++ b/packages/api/src/files/index.ts
@@ -2,3 +2,4 @@ export * from './mistral/crud';
export * from './audio';
export * from './text';
export * from './parse';
+export * from './validation';
diff --git a/api/server/services/Files/validation/pdfValidator.js b/packages/api/src/files/validation.ts
similarity index 56%
rename from api/server/services/Files/validation/pdfValidator.js
rename to packages/api/src/files/validation.ts
index 56414e8484..3d6b8ed192 100644
--- a/api/server/services/Files/validation/pdfValidator.js
+++ b/packages/api/src/files/validation.ts
@@ -1,13 +1,36 @@
-const { logger } = require('~/config');
-const { anthropicPdfSizeLimit } = require('librechat-data-provider');
+import { anthropicPdfSizeLimit, EModelEndpoint } from 'librechat-data-provider';
+
+export interface PDFValidationResult {
+ isValid: boolean;
+ error?: string;
+}
+
+export async function validatePdf(
+ pdfBuffer: Buffer,
+ fileSize: number,
+ endpoint: EModelEndpoint,
+): Promise {
+ if (endpoint === EModelEndpoint.anthropic) {
+ return validateAnthropicPdf(pdfBuffer, fileSize);
+ }
+
+ if (endpoint === EModelEndpoint.openAI || endpoint === EModelEndpoint.azureOpenAI) {
+ return validateOpenAIPdf(fileSize);
+ }
+
+ return { isValid: true };
+}
/**
* Validates if a PDF meets Anthropic's requirements
- * @param {Buffer} pdfBuffer - The PDF file as a buffer
- * @param {number} fileSize - The file size in bytes
- * @returns {Promise<{isValid: boolean, error?: string}>}
+ * @param pdfBuffer - The PDF file as a buffer
+ * @param fileSize - The file size in bytes
+ * @returns Promise that resolves to validation result
*/
-async function validateAnthropicPdf(pdfBuffer, fileSize) {
+async function validateAnthropicPdf(
+ pdfBuffer: Buffer,
+ fileSize: number,
+): Promise {
try {
if (fileSize > anthropicPdfSizeLimit) {
return {
@@ -53,13 +76,9 @@ async function validateAnthropicPdf(pdfBuffer, fileSize) {
};
}
- logger.debug(
- `PDF validation passed: ${Math.round(fileSize / 1024)}KB, ~${estimatedPages} pages`,
- );
-
return { isValid: true };
} catch (error) {
- logger.error('PDF validation error:', error);
+ console.error('PDF validation error:', error);
return {
isValid: false,
error: 'Failed to validate PDF file',
@@ -67,6 +86,13 @@ async function validateAnthropicPdf(pdfBuffer, fileSize) {
}
}
-module.exports = {
- validateAnthropicPdf,
-};
+async function validateOpenAIPdf(fileSize: number): Promise {
+ if (fileSize > 10 * 1024 * 1024) {
+ return {
+ isValid: false,
+ error: "PDF file size exceeds OpenAI's 10MB limit",
+ };
+ }
+
+ return { isValid: true };
+}
diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts
index 209efcbfa6..28ee2a6077 100644
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@@ -175,7 +175,7 @@ export enum Capabilities {
export enum AgentCapabilities {
hide_sequential_outputs = 'hide_sequential_outputs',
end_after_tools = 'end_after_tools',
- direct_upload = 'direct_upload',
+ direct_attach = 'direct_attach',
execute_code = 'execute_code',
file_search = 'file_search',
web_search = 'web_search',
@@ -249,6 +249,7 @@ export const assistantEndpointSchema = baseEndpointSchema.merge(
export type TAssistantEndpoint = z.infer;
export const defaultAgentCapabilities = [
+ AgentCapabilities.direct_attach,
AgentCapabilities.execute_code,
AgentCapabilities.file_search,
AgentCapabilities.web_search,
@@ -257,7 +258,6 @@ export const defaultAgentCapabilities = [
AgentCapabilities.tools,
AgentCapabilities.chain,
AgentCapabilities.ocr,
- AgentCapabilities.direct_upload,
];
export const agentsEndpointSchema = baseEndpointSchema
diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts
index 0e9548b1d0..f6093ec24a 100644
--- a/packages/data-provider/src/schemas.ts
+++ b/packages/data-provider/src/schemas.ts
@@ -31,6 +31,19 @@ export enum EModelEndpoint {
gptPlugins = 'gptPlugins',
}
+/**
+ * Endpoints that support direct PDF processing in the agent system
+ */
+export const documentSupportedEndpoints = new Set([
+ EModelEndpoint.anthropic,
+ EModelEndpoint.openAI,
+ EModelEndpoint.azureOpenAI,
+]);
+
+export const isDocumentSupportedEndpoint = (endpoint: EModelEndpoint): boolean => {
+ return documentSupportedEndpoints.has(endpoint);
+};
+
export const paramEndpoints = new Set([
EModelEndpoint.agents,
EModelEndpoint.openAI,
diff --git a/packages/data-provider/src/types/assistants.ts b/packages/data-provider/src/types/assistants.ts
index 3637dbf6ba..88ae1a4b8a 100644
--- a/packages/data-provider/src/types/assistants.ts
+++ b/packages/data-provider/src/types/assistants.ts
@@ -27,7 +27,7 @@ export enum Tools {
export enum EToolResources {
code_interpreter = 'code_interpreter',
- direct_upload = 'direct_upload',
+ direct_attach = 'direct_attach',
execute_code = 'execute_code',
file_search = 'file_search',
image_edit = 'image_edit',