⚗️ refactor: Provider File Validation with Configurable Size Limits (#10405)
Some checks failed
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Has been cancelled
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Has been cancelled

* chore: correct type for ServerRequest

* chore: improve ServerRequest typing across several modules

* feat: Add PDF configured limit validation

- Introduced comprehensive tests for PDF validation across multiple providers, ensuring correct behavior for file size limits and edge cases.
- Enhanced the `validatePdf` function to accept an optional configured file size limit, allowing for stricter validation based on user configurations.
- Updated related functions to utilize the new validation logic, ensuring consistent behavior across different providers.

* chore: Update Request type to ServerRequest in audio and video encoding modules

* refactor: move `getConfiguredFileSizeLimit` utility

* feat: Add video and audio validation with configurable size limits

- Introduced `validateVideo` and `validateAudio` functions to validate media files against provider-specific size limits.
- Enhanced validation logic to consider optional configured file size limits, allowing for more flexible file handling.
- Added comprehensive tests for video and audio validation across different providers, ensuring correct behavior for various scenarios.

* refactor: Update PDF and media validation to allow higher configured limits

- Modified validation logic to accept user-configured file size limits that exceed provider defaults, ensuring correct acceptance of files within the specified range.
- Updated tests to reflect changes in validation behavior, confirming that files are accepted when within the configured limits.
- Enhanced documentation in tests to clarify expected outcomes with the new validation rules.

* chore: Add @types/node-fetch dependency to package.json and package-lock.json

- Included the @types/node-fetch package to enhance type definitions for node-fetch usage.
- Updated package-lock.json to reflect the addition of the new dependency.

* fix: Rename FileConfigInput to TFileConfig
This commit is contained in:
Danny Avila 2025-11-07 10:57:15 -05:00 committed by GitHub
parent 625a321cc1
commit 360ec22964
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1237 additions and 48 deletions

View file

@ -0,0 +1,529 @@
import { Providers } from '@librechat/agents';
import { mbToBytes } from 'librechat-data-provider';
import type { AppConfig, IMongoFile } from '@librechat/data-schemas';
import type { ServerRequest } from '~/types';
import { encodeAndFormatDocuments } from './document';
/** Mock the validation module */
jest.mock('~/files/validation', () => ({
validatePdf: jest.fn(),
}));
/** Mock the utils module */
jest.mock('./utils', () => ({
getFileStream: jest.fn(),
getConfiguredFileSizeLimit: jest.fn(),
}));
import { validatePdf } from '~/files/validation';
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
import { Types } from 'mongoose';
const mockedValidatePdf = validatePdf as jest.MockedFunction<typeof validatePdf>;
const mockedGetFileStream = getFileStream as jest.MockedFunction<typeof getFileStream>;
const mockedGetConfiguredFileSizeLimit = getConfiguredFileSizeLimit as jest.MockedFunction<
typeof getConfiguredFileSizeLimit
>;
describe('encodeAndFormatDocuments - fileConfig integration', () => {
const mockStrategyFunctions = jest.fn();
beforeEach(() => {
jest.clearAllMocks();
/** Default mock implementation for getConfiguredFileSizeLimit */
mockedGetConfiguredFileSizeLimit.mockImplementation((req, provider) => {
if (!req.config?.fileConfig) {
return undefined;
}
const fileConfig = req.config.fileConfig;
const endpoints = fileConfig.endpoints;
if (endpoints?.[provider]) {
const limit = endpoints[provider].fileSizeLimit;
return limit !== undefined ? mbToBytes(limit) : undefined;
}
if (endpoints?.default) {
const limit = endpoints.default.fileSizeLimit;
return limit !== undefined ? mbToBytes(limit) : undefined;
}
return undefined;
});
});
/** Helper to create a mock request with file config */
const createMockRequest = (fileSizeLimit?: number): Partial<AppConfig> => ({
config:
fileSizeLimit !== undefined
? {
fileConfig: {
endpoints: {
[Providers.OPENAI]: {
fileSizeLimit,
},
},
},
}
: undefined,
});
/** Helper to create a mock PDF file */
const createMockFile = (sizeInMB: number): IMongoFile =>
({
_id: new Types.ObjectId(),
user: new Types.ObjectId(),
file_id: new Types.ObjectId().toString(),
filename: 'test.pdf',
type: 'application/pdf',
bytes: Math.floor(sizeInMB * 1024 * 1024),
object: 'file',
usage: 0,
source: 'test',
filepath: '/test/path.pdf',
createdAt: new Date(),
updatedAt: new Date(),
}) as unknown as IMongoFile;
describe('Configuration extraction and validation', () => {
it('should pass configured file size limit to validatePdf for OpenAI', async () => {
const configuredLimit = mbToBytes(15);
const req = createMockRequest(15) as ServerRequest;
const file = createMockFile(10);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
);
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
configuredLimit,
);
});
it('should pass undefined when no fileConfig is provided', async () => {
const req = {} as ServerRequest;
const file = createMockFile(10);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
);
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
undefined,
);
});
it('should pass undefined when fileConfig.endpoints is not defined', async () => {
const req = {
config: {
fileConfig: {},
},
} as ServerRequest;
const file = createMockFile(10);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
);
/** When fileConfig has no endpoints, getConfiguredFileSizeLimit returns undefined */
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
undefined,
);
});
it('should use endpoint-specific config for Anthropic', async () => {
const configuredLimit = mbToBytes(20);
const req = {
config: {
fileConfig: {
endpoints: {
[Providers.ANTHROPIC]: {
fileSizeLimit: 20,
},
},
},
},
} as unknown as ServerRequest;
const file = createMockFile(15);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.ANTHROPIC },
mockStrategyFunctions,
);
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.ANTHROPIC,
configuredLimit,
);
});
it('should use endpoint-specific config for Google', async () => {
const configuredLimit = mbToBytes(25);
const req = {
config: {
fileConfig: {
endpoints: {
[Providers.GOOGLE]: {
fileSizeLimit: 25,
},
},
},
},
} as unknown as ServerRequest;
const file = createMockFile(18);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.GOOGLE },
mockStrategyFunctions,
);
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.GOOGLE,
configuredLimit,
);
});
it('should pass undefined when provider-specific config not found and no default', async () => {
const req = {
config: {
fileConfig: {
endpoints: {
/** Only configure a different provider, not OpenAI */
[Providers.ANTHROPIC]: {
fileSizeLimit: 25,
},
},
},
},
} as unknown as ServerRequest;
const file = createMockFile(20);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
);
/** When provider-specific config not found and no default, returns undefined */
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
undefined,
);
});
});
describe('Validation failure handling', () => {
it('should throw error when validation fails', async () => {
const req = createMockRequest(10) as ServerRequest;
const file = createMockFile(12);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({
isValid: false,
error: 'PDF file size (12MB) exceeds the 10MB limit',
});
await expect(
encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
),
).rejects.toThrow('PDF validation failed: PDF file size (12MB) exceeds the 10MB limit');
});
it('should not call validatePdf for non-PDF files', async () => {
const req = createMockRequest(10) as ServerRequest;
const file: IMongoFile = {
...createMockFile(5),
type: 'image/jpeg',
filename: 'test.jpg',
};
const mockContent = Buffer.from('test-image-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
);
expect(mockedValidatePdf).not.toHaveBeenCalled();
});
});
describe('Bug reproduction scenarios', () => {
it('should respect user-configured lower limit (stricter than provider)', async () => {
/**
* Scenario: User sets openAI.fileSizeLimit = 5MB (stricter than 10MB provider limit)
* Uploads 7MB PDF
* Expected: Validation called with 5MB limit
*/
const req = createMockRequest(5) as ServerRequest;
const file = createMockFile(7);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({
isValid: false,
error: 'PDF file size (7MB) exceeds the 5MB limit',
});
await expect(
encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
),
).rejects.toThrow('PDF validation failed');
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
mbToBytes(5),
);
});
it('should respect user-configured higher limit (allows API changes)', async () => {
/**
* Scenario: User sets openAI.fileSizeLimit = 50MB (higher than 10MB provider default)
* Uploads 15MB PDF
* Expected: Validation called with 50MB limit, allowing files between 10-50MB
* This allows users to take advantage of API limit increases
*/
const req = createMockRequest(50) as ServerRequest;
const file = createMockFile(15);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
);
expect(mockedValidatePdf).toHaveBeenCalledWith(
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
mbToBytes(50),
);
});
it('should handle multiple files with different sizes', async () => {
const req = createMockRequest(10) as ServerRequest;
const file1 = createMockFile(5);
const file2 = createMockFile(8);
const mockContent1 = Buffer.from('pdf-content-1').toString('base64');
const mockContent2 = Buffer.from('pdf-content-2').toString('base64');
mockedGetFileStream
.mockResolvedValueOnce({
file: file1,
content: mockContent1,
metadata: file1,
})
.mockResolvedValueOnce({
file: file2,
content: mockContent2,
metadata: file2,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
await encodeAndFormatDocuments(
req,
[file1, file2],
{ provider: Providers.OPENAI },
mockStrategyFunctions,
);
expect(mockedValidatePdf).toHaveBeenCalledTimes(2);
expect(mockedValidatePdf).toHaveBeenNthCalledWith(
1,
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
mbToBytes(10),
);
expect(mockedValidatePdf).toHaveBeenNthCalledWith(
2,
expect.any(Buffer),
expect.any(Number),
Providers.OPENAI,
mbToBytes(10),
);
});
});
describe('Document formatting after validation', () => {
it('should format Anthropic document with valid PDF', async () => {
const req = createMockRequest(30) as ServerRequest;
const file = createMockFile(20);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
const result = await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.ANTHROPIC },
mockStrategyFunctions,
);
expect(result.documents).toHaveLength(1);
expect(result.documents[0]).toMatchObject({
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: mockContent,
},
citations: { enabled: true },
});
});
it('should format OpenAI document with responses API', async () => {
const req = createMockRequest(15) as ServerRequest;
const file = createMockFile(10);
const mockContent = Buffer.from('test-pdf-content').toString('base64');
mockedGetFileStream.mockResolvedValue({
file,
content: mockContent,
metadata: file,
});
mockedValidatePdf.mockResolvedValue({ isValid: true });
const result = await encodeAndFormatDocuments(
req,
[file],
{ provider: Providers.OPENAI, useResponsesApi: true },
mockStrategyFunctions,
);
expect(result.documents).toHaveLength(1);
expect(result.documents[0]).toMatchObject({
type: 'input_file',
filename: 'test.pdf',
file_data: `data:application/pdf;base64,${mockContent}`,
});
});
});
});