mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
* 🪶 feat: Add Support for Uploading Plaintext Files feat: delineate between OCR and text handling in fileConfig field of config file - also adds support for passing in mimetypes as just plain file extensions feat: add showLabel bool to support future synthetic component DynamicDropdownInput feat: add new combination dropdown-input component in params panel to support file type token limits refactor: move hovercard to side to align with other hovercards chore: clean up autogenerated comments feat: add delineation to file upload path between text and ocr configured filetypes feat: add token limit checks during file upload refactor: move textParsing out of ocrEnabled logic refactor: clean up types for filetype config refactor: finish decoupling DynamicDropdownInput from fileTokenLimits fix: move image token cost function into file to fix circular dependency causing unittest to fail and remove unused var for linter chore: remove out of scope code following review refactor: make fileTokenLimit conform to existing styles chore: remove unused localization string chore: undo changes to DynamicInput and other strays feat: add fileTokenLimit to all provider config panels fix: move textParsing back into ocr tool_resource block for now so that it doesn't interfere with other upload types * 📤 feat: Add RAG API Endpoint Support for Text Parsing (#8849) * feat: implement RAG API integration for text parsing with fallback to native parsing * chore: remove TODO now that placeholder and fllback are implemented * ✈️ refactor: Migrate Text Parsing to TS (#8892) * refactor: move generateShortLivedToken to packages/api * refactor: move textParsing logic into packages/api * refactor: reduce nesting and dry code with createTextFile * fix: add proper source handling * fix: mock new parseText and parseTextNative functions in jest file * ci: add test coverage for textParser * 💬 feat: Add Audio File Support to Upload as Text (#8893) * feat: add STT support for Upload as Text * refactor: move processAudioFile to packages/api * refactor: move textParsing from utils to files * fix: remove audio/mp3 from unsupported mimetypes test since it is now supported * ✂️ feat: Configurable File Token Limits and Truncation (#8911) * feat: add configurable fileTokenLimit default value * fix: add stt to fileConfig merge logic * fix: add fileTokenLimit to mergeFileConfig logic so configurable value is actually respected from yaml * feat: add token limiting to parsed text files * fix: add extraction logic and update tests so fileTokenLimit isnt sent to LLM providers * fix: address comments * refactor: rename textTokenLimiter.ts to text.ts * chore: update form-data package to address CVE-2025-7783 and update package-lock * feat: use default supported mime types for ocr on frontend file validation * fix: should be using logger.debug not console.debug * fix: mock existsSync in text.spec.ts * fix: mock logger rather than every one of its function calls * fix: reorganize imports and streamline file upload processing logic * refactor: update createTextFile function to use destructured parameters and improve readability * chore: update file validation to use EToolResources for improved type safety * chore: update import path for types in audio processing module * fix: update file configuration access and replace console.debug with logger.debug for improved logging --------- Co-authored-by: Dustin Healy <dustinhealy1@gmail.com> Co-authored-by: Dustin Healy <54083382+dustinhealy@users.noreply.github.com>
248 lines
7.2 KiB
JavaScript
248 lines
7.2 KiB
JavaScript
// Mock the updateFileUsage function before importing the actual processFiles
|
|
jest.mock('~/models/File', () => ({
|
|
updateFileUsage: jest.fn(),
|
|
}));
|
|
|
|
// Mock winston and logger configuration to avoid dependency issues
|
|
jest.mock('~/config', () => ({
|
|
logger: {
|
|
info: jest.fn(),
|
|
warn: jest.fn(),
|
|
debug: jest.fn(),
|
|
error: jest.fn(),
|
|
},
|
|
}));
|
|
|
|
// Mock all other dependencies that might cause issues
|
|
jest.mock('librechat-data-provider', () => ({
|
|
isUUID: { parse: jest.fn() },
|
|
megabyte: 1024 * 1024,
|
|
PrincipalType: {
|
|
USER: 'user',
|
|
GROUP: 'group',
|
|
PUBLIC: 'public',
|
|
},
|
|
PrincipalModel: {
|
|
USER: 'User',
|
|
GROUP: 'Group',
|
|
},
|
|
ResourceType: {
|
|
AGENT: 'agent',
|
|
PROJECT: 'project',
|
|
FILE: 'file',
|
|
PROMPTGROUP: 'promptGroup',
|
|
},
|
|
FileContext: { message_attachment: 'message_attachment' },
|
|
FileSources: { local: 'local' },
|
|
EModelEndpoint: { assistants: 'assistants' },
|
|
EToolResources: { file_search: 'file_search' },
|
|
mergeFileConfig: jest.fn(),
|
|
removeNullishValues: jest.fn((obj) => obj),
|
|
isAssistantsEndpoint: jest.fn(),
|
|
Constants: { COMMANDS_MAX_LENGTH: 56 },
|
|
PermissionTypes: {
|
|
BOOKMARKS: 'BOOKMARKS',
|
|
PROMPTS: 'PROMPTS',
|
|
MEMORIES: 'MEMORIES',
|
|
MULTI_CONVO: 'MULTI_CONVO',
|
|
AGENTS: 'AGENTS',
|
|
TEMPORARY_CHAT: 'TEMPORARY_CHAT',
|
|
RUN_CODE: 'RUN_CODE',
|
|
WEB_SEARCH: 'WEB_SEARCH',
|
|
FILE_CITATIONS: 'FILE_CITATIONS',
|
|
},
|
|
Permissions: {
|
|
USE: 'USE',
|
|
OPT_OUT: 'OPT_OUT',
|
|
},
|
|
SystemRoles: {
|
|
USER: 'USER',
|
|
ADMIN: 'ADMIN',
|
|
},
|
|
}));
|
|
|
|
jest.mock('~/server/services/Files/images', () => ({
|
|
convertImage: jest.fn(),
|
|
resizeAndConvert: jest.fn(),
|
|
resizeImageBuffer: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('~/server/controllers/assistants/v2', () => ({
|
|
addResourceFileId: jest.fn(),
|
|
deleteResourceFileId: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('~/models/Agent', () => ({
|
|
addAgentResourceFile: jest.fn(),
|
|
removeAgentResourceFiles: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('~/server/controllers/assistants/helpers', () => ({
|
|
getOpenAIClient: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('~/server/services/Tools/credentials', () => ({
|
|
loadAuthValues: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('~/server/services/Config', () => ({
|
|
checkCapability: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('~/server/utils/queue', () => ({
|
|
LB_QueueAsyncCall: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('./strategies', () => ({
|
|
getStrategyFunctions: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('~/server/utils', () => ({
|
|
determineFileType: jest.fn(),
|
|
}));
|
|
|
|
jest.mock('@librechat/api', () => ({
|
|
parseText: jest.fn(),
|
|
parseTextNative: jest.fn(),
|
|
}));
|
|
|
|
// Import the actual processFiles function after all mocks are set up
|
|
const { processFiles } = require('./process');
|
|
const { updateFileUsage } = require('~/models/File');
|
|
|
|
describe('processFiles', () => {
|
|
beforeEach(() => {
|
|
jest.clearAllMocks();
|
|
});
|
|
|
|
describe('null filtering functionality', () => {
|
|
it('should filter out null results from updateFileUsage when files do not exist', async () => {
|
|
const mockFiles = [
|
|
{ file_id: 'existing-file-1' },
|
|
{ file_id: 'non-existent-file' },
|
|
{ file_id: 'existing-file-2' },
|
|
];
|
|
|
|
// Mock updateFileUsage to return null for non-existent files
|
|
updateFileUsage.mockImplementation(({ file_id }) => {
|
|
if (file_id === 'non-existent-file') {
|
|
return Promise.resolve(null); // Simulate file not found in the database
|
|
}
|
|
return Promise.resolve({ file_id, usage: 1 });
|
|
});
|
|
|
|
const result = await processFiles(mockFiles);
|
|
|
|
expect(updateFileUsage).toHaveBeenCalledTimes(3);
|
|
expect(result).toEqual([
|
|
{ file_id: 'existing-file-1', usage: 1 },
|
|
{ file_id: 'existing-file-2', usage: 1 },
|
|
]);
|
|
|
|
// Critical test - ensure no null values in result
|
|
expect(result).not.toContain(null);
|
|
expect(result).not.toContain(undefined);
|
|
expect(result.length).toBe(2); // Only valid files should be returned
|
|
});
|
|
|
|
it('should return empty array when all updateFileUsage calls return null', async () => {
|
|
const mockFiles = [{ file_id: 'non-existent-1' }, { file_id: 'non-existent-2' }];
|
|
|
|
// All updateFileUsage calls return null
|
|
updateFileUsage.mockResolvedValue(null);
|
|
|
|
const result = await processFiles(mockFiles);
|
|
|
|
expect(updateFileUsage).toHaveBeenCalledTimes(2);
|
|
expect(result).toEqual([]);
|
|
expect(result).not.toContain(null);
|
|
expect(result.length).toBe(0);
|
|
});
|
|
|
|
it('should work correctly when all files exist', async () => {
|
|
const mockFiles = [{ file_id: 'file-1' }, { file_id: 'file-2' }];
|
|
|
|
updateFileUsage.mockImplementation(({ file_id }) => {
|
|
return Promise.resolve({ file_id, usage: 1 });
|
|
});
|
|
|
|
const result = await processFiles(mockFiles);
|
|
|
|
expect(result).toEqual([
|
|
{ file_id: 'file-1', usage: 1 },
|
|
{ file_id: 'file-2', usage: 1 },
|
|
]);
|
|
expect(result).not.toContain(null);
|
|
expect(result.length).toBe(2);
|
|
});
|
|
|
|
it('should handle fileIds parameter and filter nulls correctly', async () => {
|
|
const mockFiles = [{ file_id: 'file-1' }];
|
|
const mockFileIds = ['file-2', 'non-existent-file'];
|
|
|
|
updateFileUsage.mockImplementation(({ file_id }) => {
|
|
if (file_id === 'non-existent-file') {
|
|
return Promise.resolve(null);
|
|
}
|
|
return Promise.resolve({ file_id, usage: 1 });
|
|
});
|
|
|
|
const result = await processFiles(mockFiles, mockFileIds);
|
|
|
|
expect(result).toEqual([
|
|
{ file_id: 'file-1', usage: 1 },
|
|
{ file_id: 'file-2', usage: 1 },
|
|
]);
|
|
expect(result).not.toContain(null);
|
|
expect(result).not.toContain(undefined);
|
|
expect(result.length).toBe(2);
|
|
});
|
|
|
|
it('should handle duplicate file_ids correctly', async () => {
|
|
const mockFiles = [
|
|
{ file_id: 'duplicate-file' },
|
|
{ file_id: 'duplicate-file' }, // Duplicate should be ignored
|
|
{ file_id: 'unique-file' },
|
|
];
|
|
|
|
updateFileUsage.mockImplementation(({ file_id }) => {
|
|
return Promise.resolve({ file_id, usage: 1 });
|
|
});
|
|
|
|
const result = await processFiles(mockFiles);
|
|
|
|
// Should only call updateFileUsage twice (duplicate ignored)
|
|
expect(updateFileUsage).toHaveBeenCalledTimes(2);
|
|
expect(result).toEqual([
|
|
{ file_id: 'duplicate-file', usage: 1 },
|
|
{ file_id: 'unique-file', usage: 1 },
|
|
]);
|
|
expect(result.length).toBe(2);
|
|
});
|
|
});
|
|
|
|
describe('edge cases', () => {
|
|
it('should handle empty files array', async () => {
|
|
const result = await processFiles([]);
|
|
expect(result).toEqual([]);
|
|
expect(updateFileUsage).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('should handle mixed null and undefined returns from updateFileUsage', async () => {
|
|
const mockFiles = [{ file_id: 'file-1' }, { file_id: 'file-2' }, { file_id: 'file-3' }];
|
|
|
|
updateFileUsage.mockImplementation(({ file_id }) => {
|
|
if (file_id === 'file-1') return Promise.resolve(null);
|
|
if (file_id === 'file-2') return Promise.resolve(undefined);
|
|
return Promise.resolve({ file_id, usage: 1 });
|
|
});
|
|
|
|
const result = await processFiles(mockFiles);
|
|
|
|
expect(result).toEqual([{ file_id: 'file-3', usage: 1 }]);
|
|
expect(result).not.toContain(null);
|
|
expect(result).not.toContain(undefined);
|
|
expect(result.length).toBe(1);
|
|
});
|
|
});
|
|
});
|