mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-21 19:00:13 +01:00
* 🪶 feat: Add Support for Uploading Plaintext Files feat: delineate between OCR and text handling in fileConfig field of config file - also adds support for passing in mimetypes as just plain file extensions feat: add showLabel bool to support future synthetic component DynamicDropdownInput feat: add new combination dropdown-input component in params panel to support file type token limits refactor: move hovercard to side to align with other hovercards chore: clean up autogenerated comments feat: add delineation to file upload path between text and ocr configured filetypes feat: add token limit checks during file upload refactor: move textParsing out of ocrEnabled logic refactor: clean up types for filetype config refactor: finish decoupling DynamicDropdownInput from fileTokenLimits fix: move image token cost function into file to fix circular dependency causing unittest to fail and remove unused var for linter chore: remove out of scope code following review refactor: make fileTokenLimit conform to existing styles chore: remove unused localization string chore: undo changes to DynamicInput and other strays feat: add fileTokenLimit to all provider config panels fix: move textParsing back into ocr tool_resource block for now so that it doesn't interfere with other upload types * 📤 feat: Add RAG API Endpoint Support for Text Parsing (#8849) * feat: implement RAG API integration for text parsing with fallback to native parsing * chore: remove TODO now that placeholder and fllback are implemented * ✈️ refactor: Migrate Text Parsing to TS (#8892) * refactor: move generateShortLivedToken to packages/api * refactor: move textParsing logic into packages/api * refactor: reduce nesting and dry code with createTextFile * fix: add proper source handling * fix: mock new parseText and parseTextNative functions in jest file * ci: add test coverage for textParser * 💬 feat: Add Audio File Support to Upload as Text (#8893) * feat: add STT support for Upload as Text * refactor: move processAudioFile to packages/api * refactor: move textParsing from utils to files * fix: remove audio/mp3 from unsupported mimetypes test since it is now supported * ✂️ feat: Configurable File Token Limits and Truncation (#8911) * feat: add configurable fileTokenLimit default value * fix: add stt to fileConfig merge logic * fix: add fileTokenLimit to mergeFileConfig logic so configurable value is actually respected from yaml * feat: add token limiting to parsed text files * fix: add extraction logic and update tests so fileTokenLimit isnt sent to LLM providers * fix: address comments * refactor: rename textTokenLimiter.ts to text.ts * chore: update form-data package to address CVE-2025-7783 and update package-lock * feat: use default supported mime types for ocr on frontend file validation * fix: should be using logger.debug not console.debug * fix: mock existsSync in text.spec.ts * fix: mock logger rather than every one of its function calls * fix: reorganize imports and streamline file upload processing logic * refactor: update createTextFile function to use destructured parameters and improve readability * chore: update file validation to use EToolResources for improved type safety * chore: update import path for types in audio processing module * fix: update file configuration access and replace console.debug with logger.debug for improved logging --------- Co-authored-by: Dustin Healy <dustinhealy1@gmail.com> Co-authored-by: Dustin Healy <54083382+dustinhealy@users.noreply.github.com>
300 lines
7.2 KiB
TypeScript
300 lines
7.2 KiB
TypeScript
import { SheetPaths, TextPaths, FilePaths, CodePaths } from '@librechat/client';
|
|
import {
|
|
megabyte,
|
|
QueryKeys,
|
|
excelMimeTypes,
|
|
EToolResources,
|
|
codeTypeMapping,
|
|
fileConfig as defaultFileConfig,
|
|
} from 'librechat-data-provider';
|
|
import type { TFile, EndpointFileConfig, FileConfig } from 'librechat-data-provider';
|
|
import type { QueryClient } from '@tanstack/react-query';
|
|
import type { ExtendedFile } from '~/common';
|
|
|
|
export const partialTypes = ['text/x-'];
|
|
|
|
const textDocument = {
|
|
paths: TextPaths,
|
|
fill: '#FF5588',
|
|
title: 'Document',
|
|
};
|
|
|
|
const spreadsheet = {
|
|
paths: SheetPaths,
|
|
fill: '#10A37F',
|
|
title: 'Spreadsheet',
|
|
};
|
|
|
|
const codeFile = {
|
|
paths: CodePaths,
|
|
fill: '#FF6E3C',
|
|
// TODO: make this dynamic to the language
|
|
title: 'Code',
|
|
};
|
|
|
|
const artifact = {
|
|
paths: CodePaths,
|
|
fill: '#2D305C',
|
|
title: 'Code',
|
|
};
|
|
|
|
export const fileTypes = {
|
|
/* Category matches */
|
|
file: {
|
|
paths: FilePaths,
|
|
fill: '#0000FF',
|
|
title: 'File',
|
|
},
|
|
text: textDocument,
|
|
txt: textDocument,
|
|
// application:,
|
|
|
|
/* Partial matches */
|
|
csv: spreadsheet,
|
|
'application/pdf': textDocument,
|
|
pdf: textDocument,
|
|
'text/x-': codeFile,
|
|
artifact: artifact,
|
|
|
|
/* Exact matches */
|
|
// 'application/json':,
|
|
// 'text/html':,
|
|
// 'text/css':,
|
|
// image,
|
|
};
|
|
|
|
// export const getFileType = (type = '') => {
|
|
// let fileType = fileTypes.file;
|
|
// const exactMatch = fileTypes[type];
|
|
// const partialMatch = !exactMatch && partialTypes.find((type) => type.includes(type));
|
|
// const category = (!partialMatch && (type.split('/')[0] ?? 'text') || 'text');
|
|
|
|
// if (exactMatch) {
|
|
// fileType = exactMatch;
|
|
// } else if (partialMatch) {
|
|
// fileType = fileTypes[partialMatch];
|
|
// } else if (fileTypes[category]) {
|
|
// fileType = fileTypes[category];
|
|
// }
|
|
|
|
// if (!fileType) {
|
|
// fileType = fileTypes.file;
|
|
// }
|
|
|
|
// return fileType;
|
|
// };
|
|
|
|
export const getFileType = (
|
|
type = '',
|
|
): {
|
|
paths: React.FC;
|
|
fill: string;
|
|
title: string;
|
|
} => {
|
|
// Direct match check
|
|
if (fileTypes[type]) {
|
|
return fileTypes[type];
|
|
}
|
|
|
|
if (excelMimeTypes.test(type)) {
|
|
return spreadsheet;
|
|
}
|
|
|
|
// Partial match check
|
|
const partialMatch = partialTypes.find((partial) => type.includes(partial));
|
|
if (partialMatch && fileTypes[partialMatch]) {
|
|
return fileTypes[partialMatch];
|
|
}
|
|
|
|
// Category check
|
|
const category = type.split('/')[0] || 'text';
|
|
if (fileTypes[category]) {
|
|
return fileTypes[category];
|
|
}
|
|
|
|
// Default file type
|
|
return fileTypes.file;
|
|
};
|
|
|
|
/**
|
|
* Format a date string to a human readable format
|
|
* @example
|
|
* formatDate('2020-01-01T00:00:00.000Z') // '1 Jan 2020'
|
|
*/
|
|
export function formatDate(dateString: string, isSmallScreen = false) {
|
|
if (!dateString) {
|
|
return '';
|
|
}
|
|
|
|
const date = new Date(dateString);
|
|
|
|
if (isSmallScreen) {
|
|
return date.toLocaleDateString('en-US', {
|
|
month: 'numeric',
|
|
day: 'numeric',
|
|
year: '2-digit',
|
|
});
|
|
}
|
|
|
|
const months = [
|
|
'Jan',
|
|
'Feb',
|
|
'Mar',
|
|
'Apr',
|
|
'May',
|
|
'Jun',
|
|
'Jul',
|
|
'Aug',
|
|
'Sep',
|
|
'Oct',
|
|
'Nov',
|
|
'Dec',
|
|
];
|
|
|
|
const day = date.getDate();
|
|
const month = months[date.getMonth()];
|
|
const year = date.getFullYear();
|
|
|
|
return `${day} ${month} ${year}`;
|
|
}
|
|
|
|
/**
|
|
* Adds a file to the query cache
|
|
*/
|
|
export function addFileToCache(queryClient: QueryClient, newfile: TFile) {
|
|
const currentFiles = queryClient.getQueryData<TFile[]>([QueryKeys.files]);
|
|
|
|
if (!currentFiles) {
|
|
console.warn('No current files found in cache, skipped updating file query cache');
|
|
return;
|
|
}
|
|
|
|
const fileIndex = currentFiles.findIndex((file) => file.file_id === newfile.file_id);
|
|
|
|
if (fileIndex > -1) {
|
|
console.warn('File already exists in cache, skipped updating file query cache');
|
|
return;
|
|
}
|
|
|
|
queryClient.setQueryData<TFile[]>(
|
|
[QueryKeys.files],
|
|
[
|
|
{
|
|
...newfile,
|
|
},
|
|
...currentFiles,
|
|
],
|
|
);
|
|
}
|
|
|
|
export function formatBytes(bytes: number, decimals = 2) {
|
|
if (bytes === 0) {
|
|
return 0;
|
|
}
|
|
const k = 1024;
|
|
const dm = decimals < 0 ? 0 : decimals;
|
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(dm));
|
|
}
|
|
|
|
const { checkType } = defaultFileConfig;
|
|
|
|
export const validateFiles = ({
|
|
files,
|
|
fileList,
|
|
setError,
|
|
endpointFileConfig,
|
|
toolResource,
|
|
fileConfig,
|
|
}: {
|
|
fileList: File[];
|
|
files: Map<string, ExtendedFile>;
|
|
setError: (error: string) => void;
|
|
endpointFileConfig: EndpointFileConfig;
|
|
toolResource?: string;
|
|
fileConfig: FileConfig | null;
|
|
}) => {
|
|
const { fileLimit, fileSizeLimit, totalSizeLimit, supportedMimeTypes } = endpointFileConfig;
|
|
const existingFiles = Array.from(files.values());
|
|
const incomingTotalSize = fileList.reduce((total, file) => total + file.size, 0);
|
|
if (incomingTotalSize === 0) {
|
|
setError('com_error_files_empty');
|
|
return false;
|
|
}
|
|
const currentTotalSize = existingFiles.reduce((total, file) => total + file.size, 0);
|
|
|
|
if (fileLimit && fileList.length + files.size > fileLimit) {
|
|
setError(`You can only upload up to ${fileLimit} files at a time.`);
|
|
return false;
|
|
}
|
|
|
|
for (let i = 0; i < fileList.length; i++) {
|
|
let originalFile = fileList[i];
|
|
let fileType = originalFile.type;
|
|
const extension = originalFile.name.split('.').pop() ?? '';
|
|
const knownCodeType = codeTypeMapping[extension];
|
|
|
|
// Infer MIME type for Known Code files when the type is empty or a mismatch
|
|
if (knownCodeType && (!fileType || fileType !== knownCodeType)) {
|
|
fileType = knownCodeType;
|
|
}
|
|
|
|
// Check if the file type is still empty after the extension check
|
|
if (!fileType) {
|
|
setError('Unable to determine file type for: ' + originalFile.name);
|
|
return false;
|
|
}
|
|
|
|
// Replace empty type with inferred type
|
|
if (originalFile.type !== fileType) {
|
|
const newFile = new File([originalFile], originalFile.name, { type: fileType });
|
|
originalFile = newFile;
|
|
fileList[i] = newFile;
|
|
}
|
|
|
|
let mimeTypesToCheck = supportedMimeTypes;
|
|
if (toolResource === EToolResources.ocr) {
|
|
mimeTypesToCheck = [
|
|
...(fileConfig?.text?.supportedMimeTypes || []),
|
|
...(fileConfig?.ocr?.supportedMimeTypes || []),
|
|
...(fileConfig?.stt?.supportedMimeTypes || []),
|
|
];
|
|
}
|
|
|
|
if (!checkType(originalFile.type, mimeTypesToCheck)) {
|
|
console.log(originalFile);
|
|
setError('Currently, unsupported file type: ' + originalFile.type);
|
|
return false;
|
|
}
|
|
|
|
if (fileSizeLimit && originalFile.size >= fileSizeLimit) {
|
|
setError(`File size exceeds ${fileSizeLimit / megabyte} MB.`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (totalSizeLimit && currentTotalSize + incomingTotalSize > totalSizeLimit) {
|
|
setError(`The total size of the files cannot exceed ${totalSizeLimit / megabyte} MB.`);
|
|
return false;
|
|
}
|
|
|
|
const combinedFilesInfo = [
|
|
...existingFiles.map(
|
|
(file) =>
|
|
`${file.file?.name ?? file.filename}-${file.size}-${file.type?.split('/')[0] ?? 'file'}`,
|
|
),
|
|
...fileList.map(
|
|
(file: File | undefined) =>
|
|
`${file?.name}-${file?.size}-${file?.type.split('/')[0] ?? 'file'}`,
|
|
),
|
|
];
|
|
|
|
const uniqueFilesSet = new Set(combinedFilesInfo);
|
|
|
|
if (uniqueFilesSet.size !== combinedFilesInfo.length) {
|
|
setError('com_error_files_dupe');
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
};
|