mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
⚗️ refactor: Provider File Validation with Configurable Size Limits (#10405)
* chore: correct type for ServerRequest * chore: improve ServerRequest typing across several modules * feat: Add PDF configured limit validation - Introduced comprehensive tests for PDF validation across multiple providers, ensuring correct behavior for file size limits and edge cases. - Enhanced the `validatePdf` function to accept an optional configured file size limit, allowing for stricter validation based on user configurations. - Updated related functions to utilize the new validation logic, ensuring consistent behavior across different providers. * chore: Update Request type to ServerRequest in audio and video encoding modules * refactor: move `getConfiguredFileSizeLimit` utility * feat: Add video and audio validation with configurable size limits - Introduced `validateVideo` and `validateAudio` functions to validate media files against provider-specific size limits. - Enhanced validation logic to consider optional configured file size limits, allowing for more flexible file handling. - Added comprehensive tests for video and audio validation across different providers, ensuring correct behavior for various scenarios. * refactor: Update PDF and media validation to allow higher configured limits - Modified validation logic to accept user-configured file size limits that exceed provider defaults, ensuring correct acceptance of files within the specified range. - Updated tests to reflect changes in validation behavior, confirming that files are accepted when within the configured limits. - Enhanced documentation in tests to clarify expected outcomes with the new validation rules. * chore: Add @types/node-fetch dependency to package.json and package-lock.json - Included the @types/node-fetch package to enhance type definitions for node-fetch usage. - Updated package-lock.json to reflect the addition of the new dependency. * fix: Rename FileConfigInput to TFileConfig
This commit is contained in:
parent
625a321cc1
commit
360ec22964
15 changed files with 1237 additions and 48 deletions
12
package-lock.json
generated
12
package-lock.json
generated
|
|
@ -24371,6 +24371,17 @@
|
|||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node-fetch": {
|
||||
"version": "2.6.13",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/node": "*",
|
||||
"form-data": "^4.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/passport": {
|
||||
"version": "1.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@types/passport/-/passport-1.0.17.tgz",
|
||||
|
|
@ -45745,6 +45756,7 @@
|
|||
"@types/jsonwebtoken": "^9.0.0",
|
||||
"@types/multer": "^1.4.13",
|
||||
"@types/node": "^20.3.0",
|
||||
"@types/node-fetch": "^2.6.13",
|
||||
"@types/react": "^18.2.18",
|
||||
"@types/winston": "^2.4.4",
|
||||
"jest": "^29.5.0",
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@
|
|||
"@types/jsonwebtoken": "^9.0.0",
|
||||
"@types/multer": "^1.4.13",
|
||||
"@types/node": "^20.3.0",
|
||||
"@types/node-fetch": "^2.6.13",
|
||||
"@types/react": "^18.2.18",
|
||||
"@types/winston": "^2.4.4",
|
||||
"jest": "^29.5.0",
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
import { logger } from '@librechat/data-schemas';
|
||||
import { FileSources, mergeFileConfig } from 'librechat-data-provider';
|
||||
import type { fileConfigSchema } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { z } from 'zod';
|
||||
import type { ServerRequest } from '~/types';
|
||||
import { processTextWithTokenLimit } from '~/utils/text';
|
||||
|
||||
/**
|
||||
|
|
@ -20,10 +19,7 @@ export async function extractFileContext({
|
|||
tokenCountFn,
|
||||
}: {
|
||||
attachments: IMongoFile[];
|
||||
req?: {
|
||||
body?: { fileTokenLimit?: number };
|
||||
config?: { fileConfig?: z.infer<typeof fileConfigSchema> };
|
||||
};
|
||||
req?: ServerRequest;
|
||||
tokenCountFn: (text: string) => number;
|
||||
}): Promise<string | undefined> {
|
||||
if (!attachments || attachments.length === 0) {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { isDocumentSupportedProvider } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { Request } from 'express';
|
||||
import type { StrategyFunctions, AudioResult } from '~/types/files';
|
||||
import type { ServerRequest, StrategyFunctions, AudioResult } from '~/types';
|
||||
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
|
||||
import { validateAudio } from '~/files/validation';
|
||||
import { getFileStream } from './utils';
|
||||
|
||||
/**
|
||||
* Encodes and formats audio files for different providers
|
||||
|
|
@ -15,7 +14,7 @@ import { getFileStream } from './utils';
|
|||
* @returns Promise that resolves to audio and file metadata
|
||||
*/
|
||||
export async function encodeAndFormatAudios(
|
||||
req: Request,
|
||||
req: ServerRequest,
|
||||
files: IMongoFile[],
|
||||
provider: Providers,
|
||||
getStrategyFunctions: (source: string) => StrategyFunctions,
|
||||
|
|
@ -53,7 +52,16 @@ export async function encodeAndFormatAudios(
|
|||
}
|
||||
|
||||
const audioBuffer = Buffer.from(content, 'base64');
|
||||
const validation = await validateAudio(audioBuffer, audioBuffer.length, provider);
|
||||
|
||||
/** Extract configured file size limit from fileConfig for this endpoint */
|
||||
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, provider);
|
||||
|
||||
const validation = await validateAudio(
|
||||
audioBuffer,
|
||||
audioBuffer.length,
|
||||
provider,
|
||||
configuredFileSizeLimit,
|
||||
);
|
||||
|
||||
if (!validation.isValid) {
|
||||
throw new Error(`Audio validation failed: ${validation.error}`);
|
||||
|
|
|
|||
529
packages/api/src/files/encode/document.spec.ts
Normal file
529
packages/api/src/files/encode/document.spec.ts
Normal file
|
|
@ -0,0 +1,529 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { mbToBytes } from 'librechat-data-provider';
|
||||
import type { AppConfig, IMongoFile } from '@librechat/data-schemas';
|
||||
import type { ServerRequest } from '~/types';
|
||||
import { encodeAndFormatDocuments } from './document';
|
||||
|
||||
/** Mock the validation module */
|
||||
jest.mock('~/files/validation', () => ({
|
||||
validatePdf: jest.fn(),
|
||||
}));
|
||||
|
||||
/** Mock the utils module */
|
||||
jest.mock('./utils', () => ({
|
||||
getFileStream: jest.fn(),
|
||||
getConfiguredFileSizeLimit: jest.fn(),
|
||||
}));
|
||||
|
||||
import { validatePdf } from '~/files/validation';
|
||||
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
|
||||
import { Types } from 'mongoose';
|
||||
|
||||
const mockedValidatePdf = validatePdf as jest.MockedFunction<typeof validatePdf>;
|
||||
const mockedGetFileStream = getFileStream as jest.MockedFunction<typeof getFileStream>;
|
||||
const mockedGetConfiguredFileSizeLimit = getConfiguredFileSizeLimit as jest.MockedFunction<
|
||||
typeof getConfiguredFileSizeLimit
|
||||
>;
|
||||
|
||||
describe('encodeAndFormatDocuments - fileConfig integration', () => {
|
||||
const mockStrategyFunctions = jest.fn();
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
/** Default mock implementation for getConfiguredFileSizeLimit */
|
||||
mockedGetConfiguredFileSizeLimit.mockImplementation((req, provider) => {
|
||||
if (!req.config?.fileConfig) {
|
||||
return undefined;
|
||||
}
|
||||
const fileConfig = req.config.fileConfig;
|
||||
const endpoints = fileConfig.endpoints;
|
||||
if (endpoints?.[provider]) {
|
||||
const limit = endpoints[provider].fileSizeLimit;
|
||||
return limit !== undefined ? mbToBytes(limit) : undefined;
|
||||
}
|
||||
if (endpoints?.default) {
|
||||
const limit = endpoints.default.fileSizeLimit;
|
||||
return limit !== undefined ? mbToBytes(limit) : undefined;
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
});
|
||||
|
||||
/** Helper to create a mock request with file config */
|
||||
const createMockRequest = (fileSizeLimit?: number): Partial<AppConfig> => ({
|
||||
config:
|
||||
fileSizeLimit !== undefined
|
||||
? {
|
||||
fileConfig: {
|
||||
endpoints: {
|
||||
[Providers.OPENAI]: {
|
||||
fileSizeLimit,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
: undefined,
|
||||
});
|
||||
|
||||
/** Helper to create a mock PDF file */
|
||||
const createMockFile = (sizeInMB: number): IMongoFile =>
|
||||
({
|
||||
_id: new Types.ObjectId(),
|
||||
user: new Types.ObjectId(),
|
||||
file_id: new Types.ObjectId().toString(),
|
||||
filename: 'test.pdf',
|
||||
type: 'application/pdf',
|
||||
bytes: Math.floor(sizeInMB * 1024 * 1024),
|
||||
object: 'file',
|
||||
usage: 0,
|
||||
source: 'test',
|
||||
filepath: '/test/path.pdf',
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
}) as unknown as IMongoFile;
|
||||
|
||||
describe('Configuration extraction and validation', () => {
|
||||
it('should pass configured file size limit to validatePdf for OpenAI', async () => {
|
||||
const configuredLimit = mbToBytes(15);
|
||||
const req = createMockRequest(15) as ServerRequest;
|
||||
const file = createMockFile(10);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
configuredLimit,
|
||||
);
|
||||
});
|
||||
|
||||
it('should pass undefined when no fileConfig is provided', async () => {
|
||||
const req = {} as ServerRequest;
|
||||
const file = createMockFile(10);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it('should pass undefined when fileConfig.endpoints is not defined', async () => {
|
||||
const req = {
|
||||
config: {
|
||||
fileConfig: {},
|
||||
},
|
||||
} as ServerRequest;
|
||||
const file = createMockFile(10);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
/** When fileConfig has no endpoints, getConfiguredFileSizeLimit returns undefined */
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it('should use endpoint-specific config for Anthropic', async () => {
|
||||
const configuredLimit = mbToBytes(20);
|
||||
const req = {
|
||||
config: {
|
||||
fileConfig: {
|
||||
endpoints: {
|
||||
[Providers.ANTHROPIC]: {
|
||||
fileSizeLimit: 20,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ServerRequest;
|
||||
const file = createMockFile(15);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.ANTHROPIC },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.ANTHROPIC,
|
||||
configuredLimit,
|
||||
);
|
||||
});
|
||||
|
||||
it('should use endpoint-specific config for Google', async () => {
|
||||
const configuredLimit = mbToBytes(25);
|
||||
const req = {
|
||||
config: {
|
||||
fileConfig: {
|
||||
endpoints: {
|
||||
[Providers.GOOGLE]: {
|
||||
fileSizeLimit: 25,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ServerRequest;
|
||||
const file = createMockFile(18);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.GOOGLE },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.GOOGLE,
|
||||
configuredLimit,
|
||||
);
|
||||
});
|
||||
|
||||
it('should pass undefined when provider-specific config not found and no default', async () => {
|
||||
const req = {
|
||||
config: {
|
||||
fileConfig: {
|
||||
endpoints: {
|
||||
/** Only configure a different provider, not OpenAI */
|
||||
[Providers.ANTHROPIC]: {
|
||||
fileSizeLimit: 25,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as ServerRequest;
|
||||
const file = createMockFile(20);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
/** When provider-specific config not found and no default, returns undefined */
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Validation failure handling', () => {
|
||||
it('should throw error when validation fails', async () => {
|
||||
const req = createMockRequest(10) as ServerRequest;
|
||||
const file = createMockFile(12);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({
|
||||
isValid: false,
|
||||
error: 'PDF file size (12MB) exceeds the 10MB limit',
|
||||
});
|
||||
|
||||
await expect(
|
||||
encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
),
|
||||
).rejects.toThrow('PDF validation failed: PDF file size (12MB) exceeds the 10MB limit');
|
||||
});
|
||||
|
||||
it('should not call validatePdf for non-PDF files', async () => {
|
||||
const req = createMockRequest(10) as ServerRequest;
|
||||
const file: IMongoFile = {
|
||||
...createMockFile(5),
|
||||
type: 'image/jpeg',
|
||||
filename: 'test.jpg',
|
||||
};
|
||||
|
||||
const mockContent = Buffer.from('test-image-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(mockedValidatePdf).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Bug reproduction scenarios', () => {
|
||||
it('should respect user-configured lower limit (stricter than provider)', async () => {
|
||||
/**
|
||||
* Scenario: User sets openAI.fileSizeLimit = 5MB (stricter than 10MB provider limit)
|
||||
* Uploads 7MB PDF
|
||||
* Expected: Validation called with 5MB limit
|
||||
*/
|
||||
const req = createMockRequest(5) as ServerRequest;
|
||||
const file = createMockFile(7);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({
|
||||
isValid: false,
|
||||
error: 'PDF file size (7MB) exceeds the 5MB limit',
|
||||
});
|
||||
|
||||
await expect(
|
||||
encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
),
|
||||
).rejects.toThrow('PDF validation failed');
|
||||
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
mbToBytes(5),
|
||||
);
|
||||
});
|
||||
|
||||
it('should respect user-configured higher limit (allows API changes)', async () => {
|
||||
/**
|
||||
* Scenario: User sets openAI.fileSizeLimit = 50MB (higher than 10MB provider default)
|
||||
* Uploads 15MB PDF
|
||||
* Expected: Validation called with 50MB limit, allowing files between 10-50MB
|
||||
* This allows users to take advantage of API limit increases
|
||||
*/
|
||||
const req = createMockRequest(50) as ServerRequest;
|
||||
const file = createMockFile(15);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(mockedValidatePdf).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
mbToBytes(50),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle multiple files with different sizes', async () => {
|
||||
const req = createMockRequest(10) as ServerRequest;
|
||||
const file1 = createMockFile(5);
|
||||
const file2 = createMockFile(8);
|
||||
|
||||
const mockContent1 = Buffer.from('pdf-content-1').toString('base64');
|
||||
const mockContent2 = Buffer.from('pdf-content-2').toString('base64');
|
||||
|
||||
mockedGetFileStream
|
||||
.mockResolvedValueOnce({
|
||||
file: file1,
|
||||
content: mockContent1,
|
||||
metadata: file1,
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
file: file2,
|
||||
content: mockContent2,
|
||||
metadata: file2,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file1, file2],
|
||||
{ provider: Providers.OPENAI },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(mockedValidatePdf).toHaveBeenCalledTimes(2);
|
||||
expect(mockedValidatePdf).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
mbToBytes(10),
|
||||
);
|
||||
expect(mockedValidatePdf).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.any(Buffer),
|
||||
expect.any(Number),
|
||||
Providers.OPENAI,
|
||||
mbToBytes(10),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Document formatting after validation', () => {
|
||||
it('should format Anthropic document with valid PDF', async () => {
|
||||
const req = createMockRequest(30) as ServerRequest;
|
||||
const file = createMockFile(20);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
const result = await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.ANTHROPIC },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(result.documents).toHaveLength(1);
|
||||
expect(result.documents[0]).toMatchObject({
|
||||
type: 'document',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'application/pdf',
|
||||
data: mockContent,
|
||||
},
|
||||
citations: { enabled: true },
|
||||
});
|
||||
});
|
||||
|
||||
it('should format OpenAI document with responses API', async () => {
|
||||
const req = createMockRequest(15) as ServerRequest;
|
||||
const file = createMockFile(10);
|
||||
|
||||
const mockContent = Buffer.from('test-pdf-content').toString('base64');
|
||||
mockedGetFileStream.mockResolvedValue({
|
||||
file,
|
||||
content: mockContent,
|
||||
metadata: file,
|
||||
});
|
||||
|
||||
mockedValidatePdf.mockResolvedValue({ isValid: true });
|
||||
|
||||
const result = await encodeAndFormatDocuments(
|
||||
req,
|
||||
[file],
|
||||
{ provider: Providers.OPENAI, useResponsesApi: true },
|
||||
mockStrategyFunctions,
|
||||
);
|
||||
|
||||
expect(result.documents).toHaveLength(1);
|
||||
expect(result.documents[0]).toMatchObject({
|
||||
type: 'input_file',
|
||||
filename: 'test.pdf',
|
||||
file_data: `data:application/pdf;base64,${mockContent}`,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,10 +1,14 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { isOpenAILikeProvider, isDocumentSupportedProvider } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { Request } from 'express';
|
||||
import type { StrategyFunctions, DocumentResult, AnthropicDocumentBlock } from '~/types/files';
|
||||
import type {
|
||||
AnthropicDocumentBlock,
|
||||
StrategyFunctions,
|
||||
DocumentResult,
|
||||
ServerRequest,
|
||||
} from '~/types';
|
||||
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
|
||||
import { validatePdf } from '~/files/validation';
|
||||
import { getFileStream } from './utils';
|
||||
|
||||
/**
|
||||
* Processes and encodes document files for various providers
|
||||
|
|
@ -15,7 +19,7 @@ import { getFileStream } from './utils';
|
|||
* @returns Promise that resolves to documents and file metadata
|
||||
*/
|
||||
export async function encodeAndFormatDocuments(
|
||||
req: Request,
|
||||
req: ServerRequest,
|
||||
files: IMongoFile[],
|
||||
{ provider, useResponsesApi }: { provider: Providers; useResponsesApi?: boolean },
|
||||
getStrategyFunctions: (source: string) => StrategyFunctions,
|
||||
|
|
@ -62,7 +66,16 @@ export async function encodeAndFormatDocuments(
|
|||
|
||||
if (file.type === 'application/pdf' && isDocumentSupportedProvider(provider)) {
|
||||
const pdfBuffer = Buffer.from(content, 'base64');
|
||||
const validation = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
/** Extract configured file size limit from fileConfig for this endpoint */
|
||||
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, provider);
|
||||
|
||||
const validation = await validatePdf(
|
||||
pdfBuffer,
|
||||
pdfBuffer.length,
|
||||
provider,
|
||||
configuredFileSizeLimit,
|
||||
);
|
||||
|
||||
if (!validation.isValid) {
|
||||
throw new Error(`PDF validation failed: ${validation.error}`);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,26 @@
|
|||
import getStream from 'get-stream';
|
||||
import { FileSources } from 'librechat-data-provider';
|
||||
import { Providers } from '@librechat/agents';
|
||||
import { FileSources, mergeFileConfig } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { Request } from 'express';
|
||||
import type { StrategyFunctions, ProcessedFile } from '~/types/files';
|
||||
import type { ServerRequest, StrategyFunctions, ProcessedFile } from '~/types';
|
||||
|
||||
/**
|
||||
* Extracts the configured file size limit for a specific provider from fileConfig
|
||||
* @param req - The server request object containing config
|
||||
* @param provider - The provider to get the limit for
|
||||
* @returns The configured file size limit in bytes, or undefined if not configured
|
||||
*/
|
||||
export const getConfiguredFileSizeLimit = (
|
||||
req: ServerRequest,
|
||||
provider: Providers,
|
||||
): number | undefined => {
|
||||
if (!req.config?.fileConfig) {
|
||||
return undefined;
|
||||
}
|
||||
const fileConfig = mergeFileConfig(req.config.fileConfig);
|
||||
const endpointConfig = fileConfig.endpoints[provider] ?? fileConfig.endpoints.default;
|
||||
return endpointConfig?.fileSizeLimit;
|
||||
};
|
||||
|
||||
/**
|
||||
* Processes a file by downloading and encoding it to base64
|
||||
|
|
@ -13,7 +31,7 @@ import type { StrategyFunctions, ProcessedFile } from '~/types/files';
|
|||
* @returns Processed file with content and metadata, or null if filepath missing
|
||||
*/
|
||||
export async function getFileStream(
|
||||
req: Request,
|
||||
req: ServerRequest,
|
||||
file: IMongoFile,
|
||||
encodingMethods: Record<string, StrategyFunctions>,
|
||||
getStrategyFunctions: (source: string) => StrategyFunctions,
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { isDocumentSupportedProvider } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { Request } from 'express';
|
||||
import type { StrategyFunctions, VideoResult } from '~/types/files';
|
||||
import type { ServerRequest, StrategyFunctions, VideoResult } from '~/types';
|
||||
import { getFileStream, getConfiguredFileSizeLimit } from './utils';
|
||||
import { validateVideo } from '~/files/validation';
|
||||
import { getFileStream } from './utils';
|
||||
|
||||
/**
|
||||
* Encodes and formats video files for different providers
|
||||
|
|
@ -15,7 +14,7 @@ import { getFileStream } from './utils';
|
|||
* @returns Promise that resolves to videos and file metadata
|
||||
*/
|
||||
export async function encodeAndFormatVideos(
|
||||
req: Request,
|
||||
req: ServerRequest,
|
||||
files: IMongoFile[],
|
||||
provider: Providers,
|
||||
getStrategyFunctions: (source: string) => StrategyFunctions,
|
||||
|
|
@ -53,7 +52,16 @@ export async function encodeAndFormatVideos(
|
|||
}
|
||||
|
||||
const videoBuffer = Buffer.from(content, 'base64');
|
||||
const validation = await validateVideo(videoBuffer, videoBuffer.length, provider);
|
||||
|
||||
/** Extract configured file size limit from fileConfig for this endpoint */
|
||||
const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, provider);
|
||||
|
||||
const validation = await validateVideo(
|
||||
videoBuffer,
|
||||
videoBuffer.length,
|
||||
provider,
|
||||
configuredFileSizeLimit,
|
||||
);
|
||||
|
||||
if (!validation.isValid) {
|
||||
throw new Error(`Video validation failed: ${validation.error}`);
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import FormData from 'form-data';
|
|||
import { createReadStream } from 'fs';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { FileSources } from 'librechat-data-provider';
|
||||
import type { Request as ServerRequest } from 'express';
|
||||
import type { ServerRequest } from '~/types';
|
||||
import { logAxiosError, readFileAsString } from '~/utils';
|
||||
import { generateShortLivedToken } from '~/crypto/jwt';
|
||||
|
||||
|
|
@ -20,9 +20,7 @@ export async function parseText({
|
|||
file,
|
||||
file_id,
|
||||
}: {
|
||||
req: Pick<ServerRequest, 'user'> & {
|
||||
user?: { id: string };
|
||||
};
|
||||
req: ServerRequest;
|
||||
file: Express.Multer.File;
|
||||
file_id: string;
|
||||
}): Promise<{ text: string; bytes: number; source: string }> {
|
||||
|
|
|
|||
558
packages/api/src/files/validation.spec.ts
Normal file
558
packages/api/src/files/validation.spec.ts
Normal file
|
|
@ -0,0 +1,558 @@
|
|||
import { Providers } from '@librechat/agents';
|
||||
import { mbToBytes } from 'librechat-data-provider';
|
||||
import { validatePdf, validateVideo, validateAudio } from './validation';
|
||||
|
||||
describe('PDF Validation with fileConfig.endpoints.*.fileSizeLimit', () => {
|
||||
/** Helper to create a PDF buffer with valid header */
|
||||
const createMockPdfBuffer = (sizeInMB: number): Buffer => {
|
||||
const bytes = Math.floor(sizeInMB * 1024 * 1024);
|
||||
const buffer = Buffer.alloc(bytes);
|
||||
buffer.write('%PDF-1.4\n', 0);
|
||||
return buffer;
|
||||
};
|
||||
|
||||
describe('validatePdf - OpenAI provider', () => {
|
||||
const provider = Providers.OPENAI;
|
||||
|
||||
it('should accept PDF within provider limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(8);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject PDF exceeding provider limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(12);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('12MB');
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
|
||||
it('should use configured limit when it is lower than provider limit', async () => {
|
||||
const configuredLimit = 5 * 1024 * 1024; // 5MB
|
||||
const pdfBuffer = createMockPdfBuffer(7); // Between configured and provider limit
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('7MB');
|
||||
expect(result.error).toContain('5MB');
|
||||
});
|
||||
|
||||
it('should allow configured limit higher than provider default', async () => {
|
||||
const configuredLimit = 50 * 1024 * 1024; // 50MB (higher than 10MB provider default)
|
||||
const pdfBuffer = createMockPdfBuffer(12); // Between provider default and configured limit
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should accept PDF within both configured and provider limits', async () => {
|
||||
const configuredLimit = 50 * 1024 * 1024; // 50MB
|
||||
const pdfBuffer = createMockPdfBuffer(8);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should accept PDF within lower configured limit', async () => {
|
||||
const configuredLimit = 5 * 1024 * 1024; // 5MB
|
||||
const pdfBuffer = createMockPdfBuffer(4);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle exact limit size correctly', async () => {
|
||||
const configuredLimit = 10 * 1024 * 1024; // Exactly 10MB
|
||||
const pdfBuffer = Buffer.alloc(10 * 1024 * 1024);
|
||||
pdfBuffer.write('%PDF-1.4\n', 0);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('validatePdf - Anthropic provider', () => {
|
||||
const provider = Providers.ANTHROPIC;
|
||||
|
||||
it('should accept PDF within provider limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(20);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject PDF exceeding provider limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(35);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('35MB');
|
||||
expect(result.error).toContain('32MB');
|
||||
});
|
||||
|
||||
it('should use configured limit when it is lower than provider limit', async () => {
|
||||
const configuredLimit = mbToBytes(15); // 15MB
|
||||
const pdfBuffer = createMockPdfBuffer(20); // Between configured and provider limit
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('20MB');
|
||||
expect(result.error).toContain('15MB');
|
||||
});
|
||||
|
||||
it('should allow configured limit higher than provider default', async () => {
|
||||
const configuredLimit = mbToBytes(50); // 50MB (higher than 32MB provider default)
|
||||
const pdfBuffer = createMockPdfBuffer(35); // Between provider default and configured limit
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject encrypted PDFs regardless of size', async () => {
|
||||
const pdfBuffer = Buffer.alloc(1024);
|
||||
pdfBuffer.write('%PDF-1.4\n', 0);
|
||||
pdfBuffer.write('/Encrypt ', 100);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('encrypted');
|
||||
});
|
||||
|
||||
it('should reject PDFs with invalid header', async () => {
|
||||
const pdfBuffer = Buffer.alloc(1024);
|
||||
pdfBuffer.write('INVALID', 0);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('PDF header');
|
||||
});
|
||||
|
||||
it('should reject PDFs that are too small', async () => {
|
||||
const pdfBuffer = Buffer.alloc(3);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('too small');
|
||||
});
|
||||
});
|
||||
|
||||
describe('validatePdf - Google provider', () => {
|
||||
const provider = Providers.GOOGLE;
|
||||
|
||||
it('should accept PDF within provider limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(15);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject PDF exceeding provider limit when no config provided', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(25);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('25MB');
|
||||
expect(result.error).toContain('20MB');
|
||||
});
|
||||
|
||||
it('should use configured limit when it is lower than provider limit', async () => {
|
||||
const configuredLimit = 10 * 1024 * 1024; // 10MB
|
||||
const pdfBuffer = createMockPdfBuffer(15); // Between configured and provider limit
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('15MB');
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
|
||||
it('should allow configured limit higher than provider default', async () => {
|
||||
const configuredLimit = 50 * 1024 * 1024; // 50MB (higher than 20MB provider default)
|
||||
const pdfBuffer = createMockPdfBuffer(25); // Between provider default and configured limit
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('validatePdf - VertexAI provider', () => {
|
||||
const provider = Providers.VERTEXAI;
|
||||
|
||||
it('should accept PDF within provider limit', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(15);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
|
||||
it('should respect configured limit', async () => {
|
||||
const configuredLimit = 10 * 1024 * 1024;
|
||||
const pdfBuffer = createMockPdfBuffer(15);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
});
|
||||
|
||||
describe('validatePdf - Azure OpenAI provider', () => {
|
||||
const provider = Providers.AZURE;
|
||||
|
||||
it('should accept PDF within OpenAI-like provider limit', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(8);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
|
||||
it('should respect configured limit for Azure', async () => {
|
||||
const configuredLimit = 5 * 1024 * 1024;
|
||||
const pdfBuffer = createMockPdfBuffer(7);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider, configuredLimit);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('validatePdf - Unsupported providers', () => {
|
||||
it('should return valid for providers without specific validation', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(100); // Very large file
|
||||
const provider = 'unsupported' as Providers;
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Edge cases', () => {
|
||||
it('should handle zero-configured limit', async () => {
|
||||
const configuredLimit = 0;
|
||||
const pdfBuffer = createMockPdfBuffer(1);
|
||||
const result = await validatePdf(
|
||||
pdfBuffer,
|
||||
pdfBuffer.length,
|
||||
Providers.OPENAI,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('0MB');
|
||||
});
|
||||
|
||||
it('should handle very small PDF files', async () => {
|
||||
const pdfBuffer = Buffer.alloc(100);
|
||||
pdfBuffer.write('%PDF-1.4\n', 0);
|
||||
const result = await validatePdf(
|
||||
pdfBuffer,
|
||||
pdfBuffer.length,
|
||||
Providers.OPENAI,
|
||||
10 * 1024 * 1024,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle configured limit equal to provider limit', async () => {
|
||||
const configuredLimit = 10 * 1024 * 1024; // Same as OpenAI provider limit
|
||||
const pdfBuffer = createMockPdfBuffer(12);
|
||||
const result = await validatePdf(
|
||||
pdfBuffer,
|
||||
pdfBuffer.length,
|
||||
Providers.OPENAI,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
|
||||
it('should use provider limit when configured limit is undefined', async () => {
|
||||
const pdfBuffer = createMockPdfBuffer(12);
|
||||
const result = await validatePdf(pdfBuffer, pdfBuffer.length, Providers.OPENAI, undefined);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Bug reproduction - Original issue', () => {
|
||||
it('should reproduce the original bug scenario from issue description', async () => {
|
||||
/**
|
||||
* Original bug: User configures openAI.fileSizeLimit = 50MB in librechat.yaml
|
||||
* Uploads a 15MB PDF to OpenAI endpoint
|
||||
* Expected: Should be accepted (within 50MB config)
|
||||
* Actual (before fix): Rejected with "exceeds 10MB limit"
|
||||
*/
|
||||
const configuredLimit = mbToBytes(50); // User configured 50MB
|
||||
const pdfBuffer = createMockPdfBuffer(15); // User uploads 15MB file
|
||||
|
||||
const result = await validatePdf(
|
||||
pdfBuffer,
|
||||
pdfBuffer.length,
|
||||
Providers.OPENAI,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
/**
|
||||
* After fix: Should be accepted because configured limit (50MB) overrides
|
||||
* provider default (10MB), allowing for API changes
|
||||
*/
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should allow user to set stricter limits than provider', async () => {
|
||||
/**
|
||||
* Use case: User wants to enforce stricter limits than provider allows
|
||||
* User configures openAI.fileSizeLimit = 5MB
|
||||
* Uploads a 7MB PDF to OpenAI endpoint
|
||||
* Expected: Should be rejected (exceeds 5MB configured limit)
|
||||
*/
|
||||
const configuredLimit = mbToBytes(5); // User configured 5MB
|
||||
const pdfBuffer = createMockPdfBuffer(7); // User uploads 7MB file
|
||||
|
||||
const result = await validatePdf(
|
||||
pdfBuffer,
|
||||
pdfBuffer.length,
|
||||
Providers.OPENAI,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('7MB');
|
||||
expect(result.error).toContain('5MB');
|
||||
});
|
||||
|
||||
it('should allow upload within stricter user-configured limit', async () => {
|
||||
/**
|
||||
* User configures openAI.fileSizeLimit = 5MB
|
||||
* Uploads a 4MB PDF
|
||||
* Expected: Should be accepted
|
||||
*/
|
||||
const configuredLimit = mbToBytes(5);
|
||||
const pdfBuffer = createMockPdfBuffer(4);
|
||||
|
||||
const result = await validatePdf(
|
||||
pdfBuffer,
|
||||
pdfBuffer.length,
|
||||
Providers.OPENAI,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Video and Audio Validation with fileConfig', () => {
|
||||
/** Helper to create a mock video/audio buffer */
|
||||
const createMockMediaBuffer = (sizeInMB: number): Buffer => {
|
||||
const bytes = Math.floor(sizeInMB * 1024 * 1024);
|
||||
return Buffer.alloc(bytes);
|
||||
};
|
||||
|
||||
describe('validateVideo - Google provider', () => {
|
||||
const provider = Providers.GOOGLE;
|
||||
|
||||
it('should accept video within provider limit when no config provided', async () => {
|
||||
const videoBuffer = createMockMediaBuffer(15);
|
||||
const result = await validateVideo(videoBuffer, videoBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject video exceeding provider limit when no config provided', async () => {
|
||||
const videoBuffer = createMockMediaBuffer(25);
|
||||
const result = await validateVideo(videoBuffer, videoBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('25MB');
|
||||
expect(result.error).toContain('20MB');
|
||||
});
|
||||
|
||||
it('should use configured limit when it is lower than provider limit', async () => {
|
||||
const configuredLimit = mbToBytes(10); // 10MB
|
||||
const videoBuffer = createMockMediaBuffer(15); // Between configured and provider limit
|
||||
const result = await validateVideo(
|
||||
videoBuffer,
|
||||
videoBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('15MB');
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
|
||||
it('should allow configured limit higher than provider default', async () => {
|
||||
const configuredLimit = mbToBytes(50); // 50MB (higher than 20MB provider default)
|
||||
const videoBuffer = createMockMediaBuffer(25); // Between provider default and configured limit
|
||||
const result = await validateVideo(
|
||||
videoBuffer,
|
||||
videoBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should accept video within lower configured limit', async () => {
|
||||
const configuredLimit = mbToBytes(8);
|
||||
const videoBuffer = createMockMediaBuffer(7);
|
||||
const result = await validateVideo(
|
||||
videoBuffer,
|
||||
videoBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject videos that are too small', async () => {
|
||||
const videoBuffer = Buffer.alloc(5);
|
||||
const result = await validateVideo(videoBuffer, videoBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('too small');
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateAudio - Google provider', () => {
|
||||
const provider = Providers.GOOGLE;
|
||||
|
||||
it('should accept audio within provider limit when no config provided', async () => {
|
||||
const audioBuffer = createMockMediaBuffer(15);
|
||||
const result = await validateAudio(audioBuffer, audioBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject audio exceeding provider limit when no config provided', async () => {
|
||||
const audioBuffer = createMockMediaBuffer(25);
|
||||
const result = await validateAudio(audioBuffer, audioBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('25MB');
|
||||
expect(result.error).toContain('20MB');
|
||||
});
|
||||
|
||||
it('should use configured limit when it is lower than provider limit', async () => {
|
||||
const configuredLimit = mbToBytes(10); // 10MB
|
||||
const audioBuffer = createMockMediaBuffer(15); // Between configured and provider limit
|
||||
const result = await validateAudio(
|
||||
audioBuffer,
|
||||
audioBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('15MB');
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
|
||||
it('should allow configured limit higher than provider default', async () => {
|
||||
const configuredLimit = mbToBytes(50); // 50MB (higher than 20MB provider default)
|
||||
const audioBuffer = createMockMediaBuffer(25); // Between provider default and configured limit
|
||||
const result = await validateAudio(
|
||||
audioBuffer,
|
||||
audioBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should accept audio within lower configured limit', async () => {
|
||||
const configuredLimit = mbToBytes(8);
|
||||
const audioBuffer = createMockMediaBuffer(7);
|
||||
const result = await validateAudio(
|
||||
audioBuffer,
|
||||
audioBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reject audio files that are too small', async () => {
|
||||
const audioBuffer = Buffer.alloc(5);
|
||||
const result = await validateAudio(audioBuffer, audioBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('too small');
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateVideo and validateAudio - VertexAI provider', () => {
|
||||
const provider = Providers.VERTEXAI;
|
||||
|
||||
it('should respect configured video limit for VertexAI', async () => {
|
||||
const configuredLimit = mbToBytes(10);
|
||||
const videoBuffer = createMockMediaBuffer(15);
|
||||
const result = await validateVideo(
|
||||
videoBuffer,
|
||||
videoBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
|
||||
it('should respect configured audio limit for VertexAI', async () => {
|
||||
const configuredLimit = mbToBytes(10);
|
||||
const audioBuffer = createMockMediaBuffer(15);
|
||||
const result = await validateAudio(
|
||||
audioBuffer,
|
||||
audioBuffer.length,
|
||||
provider,
|
||||
configuredLimit,
|
||||
);
|
||||
|
||||
expect(result.isValid).toBe(false);
|
||||
expect(result.error).toContain('10MB');
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateVideo and validateAudio - Unsupported providers', () => {
|
||||
it('should return valid for video from unsupported provider', async () => {
|
||||
const videoBuffer = createMockMediaBuffer(100);
|
||||
const provider = Providers.OPENAI;
|
||||
const result = await validateVideo(videoBuffer, videoBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
|
||||
it('should return valid for audio from unsupported provider', async () => {
|
||||
const audioBuffer = createMockMediaBuffer(100);
|
||||
const provider = Providers.OPENAI;
|
||||
const result = await validateAudio(audioBuffer, audioBuffer.length, provider);
|
||||
|
||||
expect(result.isValid).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -20,17 +20,18 @@ export async function validatePdf(
|
|||
pdfBuffer: Buffer,
|
||||
fileSize: number,
|
||||
provider: Providers,
|
||||
configuredFileSizeLimit?: number,
|
||||
): Promise<PDFValidationResult> {
|
||||
if (provider === Providers.ANTHROPIC) {
|
||||
return validateAnthropicPdf(pdfBuffer, fileSize);
|
||||
return validateAnthropicPdf(pdfBuffer, fileSize, configuredFileSizeLimit);
|
||||
}
|
||||
|
||||
if (isOpenAILikeProvider(provider)) {
|
||||
return validateOpenAIPdf(fileSize);
|
||||
return validateOpenAIPdf(fileSize, configuredFileSizeLimit);
|
||||
}
|
||||
|
||||
if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) {
|
||||
return validateGooglePdf(fileSize);
|
||||
return validateGooglePdf(fileSize, configuredFileSizeLimit);
|
||||
}
|
||||
|
||||
return { isValid: true };
|
||||
|
|
@ -40,17 +41,23 @@ export async function validatePdf(
|
|||
* Validates if a PDF meets Anthropic's requirements
|
||||
* @param pdfBuffer - The PDF file as a buffer
|
||||
* @param fileSize - The file size in bytes
|
||||
* @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes)
|
||||
* @returns Promise that resolves to validation result
|
||||
*/
|
||||
async function validateAnthropicPdf(
|
||||
pdfBuffer: Buffer,
|
||||
fileSize: number,
|
||||
configuredFileSizeLimit?: number,
|
||||
): Promise<PDFValidationResult> {
|
||||
try {
|
||||
if (fileSize > mbToBytes(32)) {
|
||||
const providerLimit = mbToBytes(32);
|
||||
const effectiveLimit = configuredFileSizeLimit ?? providerLimit;
|
||||
|
||||
if (fileSize > effectiveLimit) {
|
||||
const limitMB = Math.round(effectiveLimit / (1024 * 1024));
|
||||
return {
|
||||
isValid: false,
|
||||
error: `PDF file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds Anthropic's 32MB limit`,
|
||||
error: `PDF file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds the ${limitMB}MB limit`,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -101,22 +108,48 @@ async function validateAnthropicPdf(
|
|||
}
|
||||
}
|
||||
|
||||
async function validateOpenAIPdf(fileSize: number): Promise<PDFValidationResult> {
|
||||
if (fileSize > 10 * 1024 * 1024) {
|
||||
/**
|
||||
* Validates if a PDF meets OpenAI's requirements
|
||||
* @param fileSize - The file size in bytes
|
||||
* @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes)
|
||||
* @returns Promise that resolves to validation result
|
||||
*/
|
||||
async function validateOpenAIPdf(
|
||||
fileSize: number,
|
||||
configuredFileSizeLimit?: number,
|
||||
): Promise<PDFValidationResult> {
|
||||
const providerLimit = mbToBytes(10);
|
||||
const effectiveLimit = configuredFileSizeLimit ?? providerLimit;
|
||||
|
||||
if (fileSize > effectiveLimit) {
|
||||
const limitMB = Math.round(effectiveLimit / (1024 * 1024));
|
||||
return {
|
||||
isValid: false,
|
||||
error: "PDF file size exceeds OpenAI's 10MB limit",
|
||||
error: `PDF file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds the ${limitMB}MB limit`,
|
||||
};
|
||||
}
|
||||
|
||||
return { isValid: true };
|
||||
}
|
||||
|
||||
async function validateGooglePdf(fileSize: number): Promise<PDFValidationResult> {
|
||||
if (fileSize > 20 * 1024 * 1024) {
|
||||
/**
|
||||
* Validates if a PDF meets Google's requirements
|
||||
* @param fileSize - The file size in bytes
|
||||
* @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes)
|
||||
* @returns Promise that resolves to validation result
|
||||
*/
|
||||
async function validateGooglePdf(
|
||||
fileSize: number,
|
||||
configuredFileSizeLimit?: number,
|
||||
): Promise<PDFValidationResult> {
|
||||
const providerLimit = mbToBytes(20);
|
||||
const effectiveLimit = configuredFileSizeLimit ?? providerLimit;
|
||||
|
||||
if (fileSize > effectiveLimit) {
|
||||
const limitMB = Math.round(effectiveLimit / (1024 * 1024));
|
||||
return {
|
||||
isValid: false,
|
||||
error: "PDF file size exceeds Google's 20MB limit",
|
||||
error: `PDF file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds the ${limitMB}MB limit`,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -128,18 +161,24 @@ async function validateGooglePdf(fileSize: number): Promise<PDFValidationResult>
|
|||
* @param videoBuffer - The video file as a buffer
|
||||
* @param fileSize - The file size in bytes
|
||||
* @param provider - The provider to validate for
|
||||
* @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes)
|
||||
* @returns Promise that resolves to validation result
|
||||
*/
|
||||
export async function validateVideo(
|
||||
videoBuffer: Buffer,
|
||||
fileSize: number,
|
||||
provider: Providers,
|
||||
configuredFileSizeLimit?: number,
|
||||
): Promise<VideoValidationResult> {
|
||||
if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) {
|
||||
if (fileSize > 20 * 1024 * 1024) {
|
||||
const providerLimit = mbToBytes(20);
|
||||
const effectiveLimit = configuredFileSizeLimit ?? providerLimit;
|
||||
|
||||
if (fileSize > effectiveLimit) {
|
||||
const limitMB = Math.round(effectiveLimit / (1024 * 1024));
|
||||
return {
|
||||
isValid: false,
|
||||
error: `Video file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds Google's 20MB limit`,
|
||||
error: `Video file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds the ${limitMB}MB limit`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -159,18 +198,24 @@ export async function validateVideo(
|
|||
* @param audioBuffer - The audio file as a buffer
|
||||
* @param fileSize - The file size in bytes
|
||||
* @param provider - The provider to validate for
|
||||
* @param configuredFileSizeLimit - Optional configured file size limit from fileConfig (in bytes)
|
||||
* @returns Promise that resolves to validation result
|
||||
*/
|
||||
export async function validateAudio(
|
||||
audioBuffer: Buffer,
|
||||
fileSize: number,
|
||||
provider: Providers,
|
||||
configuredFileSizeLimit?: number,
|
||||
): Promise<AudioValidationResult> {
|
||||
if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) {
|
||||
if (fileSize > 20 * 1024 * 1024) {
|
||||
const providerLimit = mbToBytes(20);
|
||||
const effectiveLimit = configuredFileSizeLimit ?? providerLimit;
|
||||
|
||||
if (fileSize > effectiveLimit) {
|
||||
const limitMB = Math.round(effectiveLimit / (1024 * 1024));
|
||||
return {
|
||||
isValid: false,
|
||||
error: `Audio file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds Google's 20MB limit`,
|
||||
error: `Audio file size (${Math.round(fileSize / (1024 * 1024))}MB) exceeds the ${limitMB}MB limit`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { ServerRequest } from './http';
|
||||
import type { Readable } from 'stream';
|
||||
import type { Request } from 'express';
|
||||
export interface STTService {
|
||||
getInstance(): Promise<STTService>;
|
||||
getProviderSchema(req: ServerRequest): Promise<[string, object]>;
|
||||
|
|
@ -131,5 +130,5 @@ export interface ProcessedFile {
|
|||
}
|
||||
|
||||
export interface StrategyFunctions {
|
||||
getDownloadStream: (req: Request, filepath: string) => Promise<Readable>;
|
||||
getDownloadStream: (req: ServerRequest, filepath: string) => Promise<Readable>;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,11 +7,12 @@ import type { IUser, AppConfig } from '@librechat/data-schemas';
|
|||
*/
|
||||
export type RequestBody = {
|
||||
messageId?: string;
|
||||
fileTokenLimit?: number;
|
||||
conversationId?: string;
|
||||
parentMessageId?: string;
|
||||
};
|
||||
|
||||
export type ServerRequest = Request & {
|
||||
export type ServerRequest = Request<unknown, unknown, RequestBody> & {
|
||||
user?: IUser;
|
||||
config?: AppConfig;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -317,6 +317,8 @@ export const fileConfigSchema = z.object({
|
|||
.optional(),
|
||||
});
|
||||
|
||||
export type TFileConfig = z.infer<typeof fileConfigSchema>;
|
||||
|
||||
/** Helper function to safely convert string patterns to RegExp objects */
|
||||
export const convertStringsToRegex = (patterns: string[]): RegExp[] =>
|
||||
patterns.reduce((acc: RegExp[], pattern) => {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type {
|
||||
TEndpoint,
|
||||
FileSources,
|
||||
TFileConfig,
|
||||
TAzureConfig,
|
||||
TCustomConfig,
|
||||
TMemoryConfig,
|
||||
|
|
@ -82,7 +83,7 @@ export interface AppConfig {
|
|||
/** MCP server configuration */
|
||||
mcpConfig?: TCustomConfig['mcpServers'] | null;
|
||||
/** File configuration */
|
||||
fileConfig?: TCustomConfig['fileConfig'];
|
||||
fileConfig?: TFileConfig;
|
||||
/** Secure image links configuration */
|
||||
secureImageLinks?: TCustomConfig['secureImageLinks'];
|
||||
/** Processed model specifications */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue