feat: implement Anthropic native PDF support with document preservation

- Add comprehensive debug logging throughout PDF processing pipeline
- Refactor attachment processing to separate image and document handling
- Create distinct addImageURLs(), addDocuments(), and processAttachments() methods
- Fix critical bugs in stream handling and parameter passing
- Add streamToBuffer utility for proper stream-to-buffer conversion
- Remove api/agents submodule from repository

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Andres Restrepo 2025-08-10 13:25:25 -05:00
parent 007570b5c6
commit 6605b6c800
53 changed files with 630 additions and 145 deletions

View file

@ -226,6 +226,28 @@ class AgentClient extends BaseClient {
return files;
}
async addDocuments(message, attachments) {
const documentResult =
await require('~/server/services/Files/documents').encodeAndFormatDocuments(
this.options.req,
attachments,
this.options.agent.provider,
);
message.documents =
documentResult.documents && documentResult.documents.length
? documentResult.documents
: undefined;
return documentResult.files;
}
async processAttachments(message, attachments) {
const [imageFiles, documentFiles] = await Promise.all([
this.addImageURLs(message, attachments),
this.addDocuments(message, attachments),
]);
return [...imageFiles, ...documentFiles];
}
async buildMessages(
messages,
parentMessageId,
@ -259,7 +281,7 @@ class AgentClient extends BaseClient {
};
}
const files = await this.addImageURLs(
const files = await this.processAttachments(
orderedMessages[orderedMessages.length - 1],
attachments,
);
@ -282,6 +304,23 @@ class AgentClient extends BaseClient {
assistantName: this.options?.modelLabel,
});
if (
message.documents &&
message.documents.length > 0 &&
message.role === 'user' &&
this.options.agent.provider === EModelEndpoint.anthropic
) {
const contentParts = [];
contentParts.push(...message.documents);
if (message.image_urls && message.image_urls.length > 0) {
contentParts.push(...message.image_urls);
}
const textContent =
typeof formattedMessage.content === 'string' ? formattedMessage.content : '';
contentParts.push({ type: 'text', text: textContent });
formattedMessage.content = contentParts;
}
if (message.ocr && i !== orderedMessages.length - 1) {
if (typeof formattedMessage.content === 'string') {
formattedMessage.content = message.ocr + '\n' + formattedMessage.content;
@ -777,6 +816,51 @@ class AgentClient extends BaseClient {
};
const toolSet = new Set((this.options.agent.tools ?? []).map((tool) => tool && tool.name));
if (
this.options.agent.provider === EModelEndpoint.anthropic &&
payload &&
Array.isArray(payload)
) {
let userMessageWithDocs = null;
if (this.userMessage?.documents) {
userMessageWithDocs = this.userMessage;
} else if (this.currentMessages?.length > 0) {
const lastMessage = this.currentMessages[this.currentMessages.length - 1];
if (lastMessage.documents?.length > 0) {
userMessageWithDocs = lastMessage;
}
} else if (this.messages?.length > 0) {
const lastMessage = this.messages[this.messages.length - 1];
if (lastMessage.documents?.length > 0) {
userMessageWithDocs = lastMessage;
}
}
if (userMessageWithDocs) {
for (const payloadMessage of payload) {
if (
payloadMessage.role === 'user' &&
userMessageWithDocs.text === payloadMessage.content
) {
if (typeof payloadMessage.content === 'string') {
payloadMessage.content = [
...userMessageWithDocs.documents,
{ type: 'text', text: payloadMessage.content },
];
} else if (Array.isArray(payloadMessage.content)) {
payloadMessage.content = [
...userMessageWithDocs.documents,
...payloadMessage.content,
];
}
break;
}
}
}
}
let { messages: initialMessages, indexTokenCountMap } = formatAgentMessages(
payload,
this.indexTokenCountMap,

View file

@ -43,7 +43,6 @@ afterEach(() => {
//TODO: This works/passes locally but http request tests fail with 404 in CI. Need to figure out why.
// eslint-disable-next-line jest/no-disabled-tests
describe.skip('GET /', () => {
it('should return 200 and the correct body', async () => {
process.env.APP_TITLE = 'Test Title';

View file

@ -3,7 +3,6 @@ const generateArtifactsPrompt = require('~/app/clients/prompts/artifacts');
const { getAssistant } = require('~/models/Assistant');
const buildOptions = async (endpoint, parsedBody) => {
const { promptPrefix, assistant_id, iconURL, greeting, spec, artifacts, ...modelOptions } =
parsedBody;
const endpointOption = removeNullishValues({

View file

@ -112,11 +112,11 @@ describe('initializeClient', () => {
test('should initialize client with Azure credentials when endpoint is azureOpenAI', async () => {
process.env.AZURE_API_KEY = 'test-azure-api-key';
(process.env.AZURE_OPENAI_API_INSTANCE_NAME = 'some-value'),
(process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME = 'some-value'),
(process.env.AZURE_OPENAI_API_VERSION = 'some-value'),
(process.env.AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME = 'some-value'),
(process.env.AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME = 'some-value'),
(process.env.OPENAI_API_KEY = 'test-openai-api-key');
(process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME = 'some-value'),
(process.env.AZURE_OPENAI_API_VERSION = 'some-value'),
(process.env.AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME = 'some-value'),
(process.env.AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME = 'some-value'),
(process.env.OPENAI_API_KEY = 'test-openai-api-key');
process.env.DEBUG_OPENAI = 'false';
process.env.OPENAI_SUMMARIZE = 'false';

View file

@ -0,0 +1,166 @@
const { EModelEndpoint } = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { validateAnthropicPdf } = require('../validation/pdfValidator');
/**
* Converts a readable stream to a buffer.
*
* @param {NodeJS.ReadableStream} stream - The readable stream to convert.
* @returns {Promise<Buffer>} - Promise resolving to the buffer.
*/
async function streamToBuffer(stream) {
return new Promise((resolve, reject) => {
const chunks = [];
stream.on('data', (chunk) => {
chunks.push(chunk);
});
stream.on('end', () => {
try {
const buffer = Buffer.concat(chunks);
chunks.length = 0; // Clear the array
resolve(buffer);
} catch (err) {
reject(err);
}
});
stream.on('error', (error) => {
chunks.length = 0;
reject(error);
});
}).finally(() => {
// Clean up the stream if required
if (stream.destroy && typeof stream.destroy === 'function') {
stream.destroy();
}
});
}
/**
* Processes and encodes document files for various endpoints
*
* @param {Express.Request} req - Express request object
* @param {MongoFile[]} files - Array of file objects to process
* @param {string} endpoint - The endpoint identifier (e.g., EModelEndpoint.anthropic)
* @returns {Promise<{documents: MessageContentDocument[], files: MongoFile[]}>}
*/
async function encodeAndFormatDocuments(req, files, endpoint) {
const promises = [];
/** @type {Record<FileSources, Pick<ReturnType<typeof getStrategyFunctions>, 'prepareDocumentPayload' | 'getDownloadStream'>>} */
const encodingMethods = {};
/** @type {{ documents: MessageContentDocument[]; files: MongoFile[] }} */
const result = {
documents: [],
files: [],
};
if (!files || !files.length) {
return result;
}
// Filter for document files only
const documentFiles = files.filter(
(file) => file.type === 'application/pdf' || file.type?.startsWith('application/'), // Future: support for other document types
);
if (!documentFiles.length) {
return result;
}
for (let file of documentFiles) {
/** @type {FileSources} */
const source = file.source ?? 'local';
// Only process PDFs for Anthropic for now
if (file.type !== 'application/pdf' || endpoint !== EModelEndpoint.anthropic) {
continue;
}
if (!encodingMethods[source]) {
encodingMethods[source] = getStrategyFunctions(source);
}
// Prepare file metadata
const fileMetadata = {
file_id: file.file_id || file._id,
temp_file_id: file.temp_file_id,
filepath: file.filepath,
source: file.source,
filename: file.filename,
type: file.type,
};
promises.push([file, fileMetadata]);
}
const results = await Promise.allSettled(
promises.map(async ([file, fileMetadata]) => {
if (!file || !fileMetadata) {
return { file: null, content: null, metadata: fileMetadata };
}
try {
const source = file.source ?? 'local';
const { getDownloadStream } = encodingMethods[source];
const stream = await getDownloadStream(req, file.filepath);
const buffer = await streamToBuffer(stream);
const documentContent = buffer.toString('base64');
return {
file,
content: documentContent,
metadata: fileMetadata,
};
} catch (error) {
console.error(`Error processing document ${file.filename}:`, error);
return { file, content: null, metadata: fileMetadata };
}
}),
);
for (const settledResult of results) {
if (settledResult.status === 'rejected') {
console.error('Document processing failed:', settledResult.reason);
continue;
}
const { file, content, metadata } = settledResult.value;
if (!content || !file) {
if (metadata) {
result.files.push(metadata);
}
continue;
}
if (file.type === 'application/pdf' && endpoint === EModelEndpoint.anthropic) {
const pdfBuffer = Buffer.from(content, 'base64');
const validation = await validateAnthropicPdf(pdfBuffer, pdfBuffer.length);
if (!validation.isValid) {
throw new Error(`PDF validation failed: ${validation.error}`);
}
const documentPart = {
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: content,
},
};
result.documents.push(documentPart);
result.files.push(metadata);
}
}
return result;
}
module.exports = {
encodeAndFormatDocuments,
};

View file

@ -0,0 +1,5 @@
const { encodeAndFormatDocuments } = require('./encode');
module.exports = {
encodeAndFormatDocuments,
};

View file

@ -391,7 +391,17 @@ const processFileUpload = async ({ req, res, metadata }) => {
const isAssistantUpload = isAssistantsEndpoint(metadata.endpoint);
const assistantSource =
metadata.endpoint === EModelEndpoint.azureAssistants ? FileSources.azure : FileSources.openai;
const source = isAssistantUpload ? assistantSource : FileSources.vectordb;
// Use local storage for Anthropic native PDF support, vectordb for others
const isAnthropicUpload = metadata.endpoint === EModelEndpoint.anthropic;
let source;
if (isAssistantUpload) {
source = assistantSource;
} else if (isAnthropicUpload) {
source = FileSources.local;
} else {
source = FileSources.vectordb;
}
const { handleFileUpload } = getStrategyFunctions(source);
const { file_id, temp_file_id } = metadata;

View file

@ -0,0 +1,77 @@
const { logger } = require('~/config');
const { anthropicPdfSizeLimit } = require('librechat-data-provider');
/**
* Validates if a PDF meets Anthropic's requirements
* @param {Buffer} pdfBuffer - The PDF file as a buffer
* @param {number} fileSize - The file size in bytes
* @returns {Promise<{isValid: boolean, error?: string}>}
*/
async function validateAnthropicPdf(pdfBuffer, fileSize) {
try {
// Check file size (32MB limit)
if (fileSize > anthropicPdfSizeLimit) {
return {
isValid: false,
error: `PDF file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds Anthropic's 32MB limit`,
};
}
// Basic PDF header validation
if (!pdfBuffer || pdfBuffer.length < 5) {
return {
isValid: false,
error: 'Invalid PDF file: too small or corrupted',
};
}
// Check PDF magic bytes
const pdfHeader = pdfBuffer.subarray(0, 5).toString();
if (!pdfHeader.startsWith('%PDF-')) {
return {
isValid: false,
error: 'Invalid PDF file: missing PDF header',
};
}
// Check for password protection/encryption
const pdfContent = pdfBuffer.toString('binary');
if (
pdfContent.includes('/Encrypt ') ||
pdfContent.includes('/U (') ||
pdfContent.includes('/O (')
) {
return {
isValid: false,
error: 'PDF is password-protected or encrypted. Anthropic requires unencrypted PDFs.',
};
}
// Estimate page count (this is a rough estimation)
const pageMatches = pdfContent.match(/\/Type[\s]*\/Page[^s]/g);
const estimatedPages = pageMatches ? pageMatches.length : 1;
if (estimatedPages > 100) {
return {
isValid: false,
error: `PDF has approximately ${estimatedPages} pages, exceeding Anthropic's 100-page limit`,
};
}
logger.debug(
`PDF validation passed: ${Math.round(fileSize / 1024)}KB, ~${estimatedPages} pages`,
);
return { isValid: true };
} catch (error) {
logger.error('PDF validation error:', error);
return {
isValid: false,
error: 'Failed to validate PDF file',
};
}
}
module.exports = {
validateAnthropicPdf,
};

View file

@ -255,7 +255,7 @@ describe('processMessages', () => {
type: 'text',
text: {
value:
'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
"The text you have uploaded is from the book \"Harry Potter and the Philosopher's Stone\" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.",
annotations: [
{
type: 'file_citation',
@ -424,7 +424,7 @@ These points highlight Harry's initial experiences in the magical world and set
type: 'text',
text: {
value:
'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
"The text you have uploaded is from the book \"Harry Potter and the Philosopher's Stone\" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.",
annotations: [
{
type: 'file_citation',
@ -582,7 +582,7 @@ These points highlight Harry's initial experiences in the magical world and set
type: 'text',
text: {
value:
'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation【11:2†source】.',
"This is a test ^1^ with pre-existing citation-like text. Here's a real citation【11:2†source】.",
annotations: [
{
type: 'file_citation',
@ -610,7 +610,7 @@ These points highlight Harry's initial experiences in the magical world and set
});
const expectedText =
'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation^1^.\n\n^1.^ test.txt';
"This is a test ^1^ with pre-existing citation-like text. Here's a real citation^1^.\n\n^1.^ test.txt";
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);