LibreChat/api/server/services/Files/processFiles.test.js
Danny Avila 48f6f8f2f8
📎 feat: Upload as Text Support for Plaintext, STT, RAG, and Token Limits (#8868)
* 🪶 feat: Add Support for Uploading Plaintext Files

feat: delineate between OCR and text handling in fileConfig field of config file

- also adds support for passing in mimetypes as just plain file extensions

feat: add showLabel bool to support future synthetic component DynamicDropdownInput

feat: add new combination dropdown-input component in params panel to support file type token limits

refactor: move hovercard to side to align with other hovercards

chore: clean up autogenerated comments

feat: add delineation to file upload path between text and ocr configured filetypes

feat: add token limit checks during file upload

refactor: move textParsing out of ocrEnabled logic

refactor: clean up types for filetype config

refactor: finish decoupling DynamicDropdownInput from fileTokenLimits

fix: move image token cost function into file to fix circular dependency causing unittest to fail and remove unused var for linter

chore: remove out of scope code following review

refactor: make fileTokenLimit conform to existing styles

chore: remove unused localization string

chore: undo changes to DynamicInput and other strays

feat: add fileTokenLimit to all provider config panels

fix: move textParsing back into ocr tool_resource block for now so that it doesn't interfere with other upload types

* 📤 feat: Add RAG API Endpoint Support for Text Parsing (#8849)

* feat: implement RAG API integration for text parsing with fallback to native parsing

* chore: remove TODO now that placeholder and fllback are implemented

* ✈️ refactor: Migrate Text Parsing to TS (#8892)

* refactor: move generateShortLivedToken to packages/api

* refactor: move textParsing logic into packages/api

* refactor: reduce nesting and dry code with createTextFile

* fix: add proper source handling

* fix: mock new parseText and parseTextNative functions in jest file

* ci: add test coverage for textParser

* 💬 feat: Add Audio File Support to Upload as Text (#8893)

* feat: add STT support for Upload as Text

* refactor: move processAudioFile to packages/api

* refactor: move textParsing from utils to files

* fix: remove audio/mp3 from unsupported mimetypes test since it is now supported

* ✂️ feat: Configurable File Token Limits and Truncation (#8911)

* feat: add configurable fileTokenLimit default value

* fix: add stt to fileConfig merge logic

* fix: add fileTokenLimit to mergeFileConfig logic so configurable value is actually respected from yaml

* feat: add token limiting to parsed text files

* fix: add extraction logic and update tests so fileTokenLimit isnt sent to LLM providers

* fix: address comments

* refactor: rename textTokenLimiter.ts to text.ts

* chore: update form-data package to address CVE-2025-7783 and update package-lock

* feat: use default supported mime types for ocr on frontend file validation

* fix: should be using logger.debug not console.debug

* fix: mock existsSync in text.spec.ts

* fix: mock logger rather than every one of its function calls

* fix: reorganize imports and streamline file upload processing logic

* refactor: update createTextFile function to use destructured parameters and improve readability

* chore: update file validation to use EToolResources for improved type safety

* chore: update import path for types in audio processing module

* fix: update file configuration access and replace console.debug with logger.debug for improved logging

---------

Co-authored-by: Dustin Healy <dustinhealy1@gmail.com>
Co-authored-by: Dustin Healy <54083382+dustinhealy@users.noreply.github.com>
2025-08-27 03:44:39 -04:00

248 lines
7.2 KiB
JavaScript

// Mock the updateFileUsage function before importing the actual processFiles
jest.mock('~/models/File', () => ({
updateFileUsage: jest.fn(),
}));
// Mock winston and logger configuration to avoid dependency issues
jest.mock('~/config', () => ({
logger: {
info: jest.fn(),
warn: jest.fn(),
debug: jest.fn(),
error: jest.fn(),
},
}));
// Mock all other dependencies that might cause issues
jest.mock('librechat-data-provider', () => ({
isUUID: { parse: jest.fn() },
megabyte: 1024 * 1024,
PrincipalType: {
USER: 'user',
GROUP: 'group',
PUBLIC: 'public',
},
PrincipalModel: {
USER: 'User',
GROUP: 'Group',
},
ResourceType: {
AGENT: 'agent',
PROJECT: 'project',
FILE: 'file',
PROMPTGROUP: 'promptGroup',
},
FileContext: { message_attachment: 'message_attachment' },
FileSources: { local: 'local' },
EModelEndpoint: { assistants: 'assistants' },
EToolResources: { file_search: 'file_search' },
mergeFileConfig: jest.fn(),
removeNullishValues: jest.fn((obj) => obj),
isAssistantsEndpoint: jest.fn(),
Constants: { COMMANDS_MAX_LENGTH: 56 },
PermissionTypes: {
BOOKMARKS: 'BOOKMARKS',
PROMPTS: 'PROMPTS',
MEMORIES: 'MEMORIES',
MULTI_CONVO: 'MULTI_CONVO',
AGENTS: 'AGENTS',
TEMPORARY_CHAT: 'TEMPORARY_CHAT',
RUN_CODE: 'RUN_CODE',
WEB_SEARCH: 'WEB_SEARCH',
FILE_CITATIONS: 'FILE_CITATIONS',
},
Permissions: {
USE: 'USE',
OPT_OUT: 'OPT_OUT',
},
SystemRoles: {
USER: 'USER',
ADMIN: 'ADMIN',
},
}));
jest.mock('~/server/services/Files/images', () => ({
convertImage: jest.fn(),
resizeAndConvert: jest.fn(),
resizeImageBuffer: jest.fn(),
}));
jest.mock('~/server/controllers/assistants/v2', () => ({
addResourceFileId: jest.fn(),
deleteResourceFileId: jest.fn(),
}));
jest.mock('~/models/Agent', () => ({
addAgentResourceFile: jest.fn(),
removeAgentResourceFiles: jest.fn(),
}));
jest.mock('~/server/controllers/assistants/helpers', () => ({
getOpenAIClient: jest.fn(),
}));
jest.mock('~/server/services/Tools/credentials', () => ({
loadAuthValues: jest.fn(),
}));
jest.mock('~/server/services/Config', () => ({
checkCapability: jest.fn(),
}));
jest.mock('~/server/utils/queue', () => ({
LB_QueueAsyncCall: jest.fn(),
}));
jest.mock('./strategies', () => ({
getStrategyFunctions: jest.fn(),
}));
jest.mock('~/server/utils', () => ({
determineFileType: jest.fn(),
}));
jest.mock('@librechat/api', () => ({
parseText: jest.fn(),
parseTextNative: jest.fn(),
}));
// Import the actual processFiles function after all mocks are set up
const { processFiles } = require('./process');
const { updateFileUsage } = require('~/models/File');
describe('processFiles', () => {
beforeEach(() => {
jest.clearAllMocks();
});
describe('null filtering functionality', () => {
it('should filter out null results from updateFileUsage when files do not exist', async () => {
const mockFiles = [
{ file_id: 'existing-file-1' },
{ file_id: 'non-existent-file' },
{ file_id: 'existing-file-2' },
];
// Mock updateFileUsage to return null for non-existent files
updateFileUsage.mockImplementation(({ file_id }) => {
if (file_id === 'non-existent-file') {
return Promise.resolve(null); // Simulate file not found in the database
}
return Promise.resolve({ file_id, usage: 1 });
});
const result = await processFiles(mockFiles);
expect(updateFileUsage).toHaveBeenCalledTimes(3);
expect(result).toEqual([
{ file_id: 'existing-file-1', usage: 1 },
{ file_id: 'existing-file-2', usage: 1 },
]);
// Critical test - ensure no null values in result
expect(result).not.toContain(null);
expect(result).not.toContain(undefined);
expect(result.length).toBe(2); // Only valid files should be returned
});
it('should return empty array when all updateFileUsage calls return null', async () => {
const mockFiles = [{ file_id: 'non-existent-1' }, { file_id: 'non-existent-2' }];
// All updateFileUsage calls return null
updateFileUsage.mockResolvedValue(null);
const result = await processFiles(mockFiles);
expect(updateFileUsage).toHaveBeenCalledTimes(2);
expect(result).toEqual([]);
expect(result).not.toContain(null);
expect(result.length).toBe(0);
});
it('should work correctly when all files exist', async () => {
const mockFiles = [{ file_id: 'file-1' }, { file_id: 'file-2' }];
updateFileUsage.mockImplementation(({ file_id }) => {
return Promise.resolve({ file_id, usage: 1 });
});
const result = await processFiles(mockFiles);
expect(result).toEqual([
{ file_id: 'file-1', usage: 1 },
{ file_id: 'file-2', usage: 1 },
]);
expect(result).not.toContain(null);
expect(result.length).toBe(2);
});
it('should handle fileIds parameter and filter nulls correctly', async () => {
const mockFiles = [{ file_id: 'file-1' }];
const mockFileIds = ['file-2', 'non-existent-file'];
updateFileUsage.mockImplementation(({ file_id }) => {
if (file_id === 'non-existent-file') {
return Promise.resolve(null);
}
return Promise.resolve({ file_id, usage: 1 });
});
const result = await processFiles(mockFiles, mockFileIds);
expect(result).toEqual([
{ file_id: 'file-1', usage: 1 },
{ file_id: 'file-2', usage: 1 },
]);
expect(result).not.toContain(null);
expect(result).not.toContain(undefined);
expect(result.length).toBe(2);
});
it('should handle duplicate file_ids correctly', async () => {
const mockFiles = [
{ file_id: 'duplicate-file' },
{ file_id: 'duplicate-file' }, // Duplicate should be ignored
{ file_id: 'unique-file' },
];
updateFileUsage.mockImplementation(({ file_id }) => {
return Promise.resolve({ file_id, usage: 1 });
});
const result = await processFiles(mockFiles);
// Should only call updateFileUsage twice (duplicate ignored)
expect(updateFileUsage).toHaveBeenCalledTimes(2);
expect(result).toEqual([
{ file_id: 'duplicate-file', usage: 1 },
{ file_id: 'unique-file', usage: 1 },
]);
expect(result.length).toBe(2);
});
});
describe('edge cases', () => {
it('should handle empty files array', async () => {
const result = await processFiles([]);
expect(result).toEqual([]);
expect(updateFileUsage).not.toHaveBeenCalled();
});
it('should handle mixed null and undefined returns from updateFileUsage', async () => {
const mockFiles = [{ file_id: 'file-1' }, { file_id: 'file-2' }, { file_id: 'file-3' }];
updateFileUsage.mockImplementation(({ file_id }) => {
if (file_id === 'file-1') return Promise.resolve(null);
if (file_id === 'file-2') return Promise.resolve(undefined);
return Promise.resolve({ file_id, usage: 1 });
});
const result = await processFiles(mockFiles);
expect(result).toEqual([{ file_id: 'file-3', usage: 1 }]);
expect(result).not.toContain(null);
expect(result).not.toContain(undefined);
expect(result.length).toBe(1);
});
});
});