mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-04-06 07:47:20 +02:00
* fix: add ODT support to native document parser * fix: replace execSync with jszip for ODT parsing * docs: update documentParserMimeTypes comment to include odt * fix: improve ODT XML extraction and add empty.odt fixture - Scope extraction to <office:body> to exclude metadata/style nodes - Map </text:p> and </text:h> closings to newlines, preserving paragraph structure instead of collapsing everything to a single line - Handle <text:line-break/> as explicit newlines - Strip remaining tags, normalize horizontal whitespace, cap consecutive blank lines at one - Regenerate sample.odt as a two-paragraph fixture so the test exercises multi-paragraph output - Add empty.odt fixture and test asserting 'No text found in document' * fix: address review findings in ODT parser - Use static `import JSZip from 'jszip'` instead of dynamic import; jszip is CommonJS-only with no ESM/Jest-isolation concern (F1) - Decode the five standard XML entities after tag-stripping so documents with &, <, >, ", ' send correct text to the LLM (F2) - Remove @types/jszip devDependency; jszip ships bundled declarations and @types/jszip is a stale 2020 stub that would shadow them (F3) - Handle <text:tab/> → \t and <text:s .../> → ' ' before the generic tag stripper so tab-aligned and multi-space content is preserved (F4) - Add sample-entities.odt fixture and test covering entity decoding, tab, and spacing-element handling (F5) - Rename 'throws for empty odt' → 'throws for odt with no extractable text' to distinguish from a zero-byte/corrupt file case (F8) * fix: add decompressed content size cap to odtToText (F6) Reads uncompressed entry sizes from the JSZip internal metadata before extracting any content. Throws if the total exceeds 50MB, preventing a crafted ODT with a high-ratio compressed payload from exhausting heap. Adds a corresponding test using a real DEFLATE-compressed ZIP (~51KB on disk, 51MB uncompressed) to verify the guard fires before any extraction. * fix: add java to codeTypeMapping for file upload support .java files were rejected with "Unable to determine file type" because browsers send an empty MIME type for them and codeTypeMapping had no 'java' entry for inferMimeType() to fall back on. text/x-java was already present in all five validation lists (fullMimeTypesList, codeInterpreterMimeTypesList, retrievalMimeTypesList, textMimeTypes, retrievalMimeTypes), so mapping to it (not text/plain) ensures .java uploads work for both File Search and Code Interpreter. Closes #12307 * fix: address follow-up review findings (A-E) A: regenerate package-lock.json after removing @types/jszip from package.json; without this npm ci was still installing the stale 2020 type stubs and TypeScript was resolving against them B: replace dynamic import('jszip') in the zip-bomb test with the same static import already used in production; jszip is CJS-only with no ESM/Jest isolation concern C: document that the _data.uncompressedSize guard fails open if jszip renames the private field (accepted limitation, test would catch it) D: rename 'preserves tabs' test to 'normalizes tab and spacing elements to spaces' since <text:tab> is collapsed to a space, not kept as \t E: fix test.each([ formatting artifact (missing newline after '[') --------- Co-authored-by: Danny Avila <danny@librechat.ai>
764 lines
25 KiB
TypeScript
764 lines
25 KiB
TypeScript
import { z } from 'zod';
|
|
import type { EndpointFileConfig, FileConfig } from './types/files';
|
|
import { EModelEndpoint, isAgentsEndpoint, isDocumentSupportedProvider } from './schemas';
|
|
import { normalizeEndpointName } from './utils';
|
|
|
|
export const supportsFiles = {
|
|
[EModelEndpoint.openAI]: true,
|
|
[EModelEndpoint.google]: true,
|
|
[EModelEndpoint.assistants]: true,
|
|
[EModelEndpoint.azureAssistants]: true,
|
|
[EModelEndpoint.agents]: true,
|
|
[EModelEndpoint.azureOpenAI]: true,
|
|
[EModelEndpoint.anthropic]: true,
|
|
[EModelEndpoint.custom]: true,
|
|
[EModelEndpoint.bedrock]: true,
|
|
};
|
|
|
|
export const excelFileTypes = [
|
|
'application/vnd.ms-excel',
|
|
'application/msexcel',
|
|
'application/x-msexcel',
|
|
'application/x-ms-excel',
|
|
'application/x-excel',
|
|
'application/x-dos_ms_excel',
|
|
'application/xls',
|
|
'application/x-xls',
|
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
];
|
|
|
|
export const fullMimeTypesList = [
|
|
'text/x-c',
|
|
'text/x-c++',
|
|
'application/csv',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'text/html',
|
|
'text/x-java',
|
|
'application/json',
|
|
'text/markdown',
|
|
'application/pdf',
|
|
'text/x-php',
|
|
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'text/x-python',
|
|
'text/x-script.python',
|
|
'text/x-ruby',
|
|
'text/x-tex',
|
|
'text/plain',
|
|
'text/css',
|
|
'text/vtt',
|
|
'image/jpeg',
|
|
'text/javascript',
|
|
'image/gif',
|
|
'image/png',
|
|
'image/heic',
|
|
'image/heif',
|
|
'application/x-tar',
|
|
'application/x-sh',
|
|
'application/typescript',
|
|
'application/sql',
|
|
'application/yaml',
|
|
'application/vnd.coffeescript',
|
|
'application/xml',
|
|
'application/zip',
|
|
'application/x-parquet',
|
|
'application/vnd.oasis.opendocument.text',
|
|
'application/vnd.oasis.opendocument.spreadsheet',
|
|
'application/vnd.oasis.opendocument.presentation',
|
|
'application/vnd.oasis.opendocument.graphics',
|
|
'image/svg',
|
|
'image/svg+xml',
|
|
// Video formats
|
|
'video/mp4',
|
|
'video/avi',
|
|
'video/mov',
|
|
'video/wmv',
|
|
'video/flv',
|
|
'video/webm',
|
|
'video/mkv',
|
|
'video/m4v',
|
|
'video/3gp',
|
|
'video/ogv',
|
|
// Audio formats
|
|
'audio/mp3',
|
|
'audio/wav',
|
|
'audio/ogg',
|
|
'audio/m4a',
|
|
'audio/aac',
|
|
'audio/flac',
|
|
'audio/wma',
|
|
'audio/opus',
|
|
'audio/mpeg',
|
|
...excelFileTypes,
|
|
];
|
|
|
|
export const codeInterpreterMimeTypesList = [
|
|
'text/x-c',
|
|
'text/x-c++',
|
|
'application/csv',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'text/html',
|
|
'text/x-java',
|
|
'application/json',
|
|
'text/markdown',
|
|
'application/pdf',
|
|
'text/x-php',
|
|
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'text/x-python',
|
|
'text/x-script.python',
|
|
'text/x-ruby',
|
|
'text/x-tex',
|
|
'text/plain',
|
|
'text/css',
|
|
'image/jpeg',
|
|
'text/javascript',
|
|
'image/gif',
|
|
'image/png',
|
|
'image/heic',
|
|
'image/heif',
|
|
'application/x-tar',
|
|
'application/typescript',
|
|
'application/xml',
|
|
'application/zip',
|
|
'application/x-parquet',
|
|
...excelFileTypes,
|
|
];
|
|
|
|
export const retrievalMimeTypesList = [
|
|
'text/x-c',
|
|
'text/x-c++',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'text/html',
|
|
'text/x-java',
|
|
'application/json',
|
|
'text/markdown',
|
|
'application/pdf',
|
|
'text/x-php',
|
|
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'text/x-python',
|
|
'text/x-script.python',
|
|
'text/x-ruby',
|
|
'text/x-tex',
|
|
'text/plain',
|
|
];
|
|
|
|
export const imageExtRegex = /\.(jpg|jpeg|png|gif|webp|heic|heif)$/i;
|
|
|
|
/** @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html */
|
|
export type BedrockDocumentFormat =
|
|
| 'pdf'
|
|
| 'csv'
|
|
| 'doc'
|
|
| 'docx'
|
|
| 'xls'
|
|
| 'xlsx'
|
|
| 'html'
|
|
| 'txt'
|
|
| 'md';
|
|
|
|
/** Maps MIME types to Bedrock Converse API document format values */
|
|
export const bedrockDocumentFormats: Record<string, BedrockDocumentFormat> = {
|
|
'application/pdf': 'pdf',
|
|
'text/csv': 'csv',
|
|
'application/csv': 'csv',
|
|
'application/msword': 'doc',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
|
|
'application/vnd.ms-excel': 'xls',
|
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
|
|
'text/html': 'html',
|
|
'text/plain': 'txt',
|
|
'text/markdown': 'md',
|
|
};
|
|
|
|
export const isBedrockDocumentType = (mimeType?: string): boolean =>
|
|
mimeType != null && mimeType in bedrockDocumentFormats;
|
|
|
|
/** File extensions accepted by Bedrock document uploads (for input accept attributes) */
|
|
export const bedrockDocumentExtensions =
|
|
'.pdf,.csv,.doc,.docx,.xls,.xlsx,.html,.htm,.txt,.md,application/pdf,text/csv,application/csv,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,text/html,text/plain,text/markdown';
|
|
|
|
export const excelMimeTypes =
|
|
/^application\/(vnd\.ms-excel|msexcel|x-msexcel|x-ms-excel|x-excel|x-dos_ms_excel|xls|x-xls|vnd\.openxmlformats-officedocument\.spreadsheetml\.sheet)$/;
|
|
|
|
export const textMimeTypes =
|
|
/^(text\/(x-c|x-csharp|tab-separated-values|x-c\+\+|x-h|x-java|html|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|css|vtt|javascript|csv|xml))$/;
|
|
|
|
export const applicationMimeTypes =
|
|
/^(application\/(epub\+zip|csv|json|msword|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|vnd\.oasis\.opendocument\.(text|spreadsheet|presentation|graphics)|xml|zip))$/;
|
|
|
|
export const imageMimeTypes = /^image\/(jpeg|gif|png|webp|heic|heif)$/;
|
|
|
|
export const audioMimeTypes =
|
|
/^audio\/(mp3|mpeg|mpeg3|wav|wave|x-wav|ogg|vorbis|mp4|m4a|x-m4a|flac|x-flac|webm|aac|wma|opus)$/;
|
|
|
|
export const videoMimeTypes = /^video\/(mp4|avi|mov|wmv|flv|webm|mkv|m4v|3gp|ogv)$/;
|
|
|
|
export const defaultOCRMimeTypes = [
|
|
imageMimeTypes,
|
|
excelMimeTypes,
|
|
/^application\/pdf$/,
|
|
/^application\/vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation)$/,
|
|
/^application\/vnd\.ms-(word|powerpoint)$/,
|
|
/^application\/epub\+zip$/,
|
|
/^application\/vnd\.oasis\.opendocument\.(text|spreadsheet|presentation|graphics)$/,
|
|
];
|
|
|
|
/** MIME types handled by the built-in document parser (pdf, docx, excel variants, ods/odt) */
|
|
export const documentParserMimeTypes = [
|
|
excelMimeTypes,
|
|
/^application\/pdf$/,
|
|
/^application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.document$/,
|
|
/^application\/vnd\.oasis\.opendocument\.spreadsheet$/,
|
|
/^application\/vnd\.oasis\.opendocument\.text$/,
|
|
];
|
|
|
|
export const defaultTextMimeTypes = [/^[\w.-]+\/[\w.-]+$/];
|
|
|
|
export const defaultSTTMimeTypes = [audioMimeTypes];
|
|
|
|
export const supportedMimeTypes = [
|
|
textMimeTypes,
|
|
excelMimeTypes,
|
|
applicationMimeTypes,
|
|
imageMimeTypes,
|
|
videoMimeTypes,
|
|
audioMimeTypes,
|
|
/** Supported by LC Code Interpreter API */
|
|
/^image\/(svg|svg\+xml)$/,
|
|
];
|
|
|
|
export const codeInterpreterMimeTypes = [
|
|
textMimeTypes,
|
|
excelMimeTypes,
|
|
applicationMimeTypes,
|
|
imageMimeTypes,
|
|
];
|
|
|
|
export const codeTypeMapping: { [key: string]: string } = {
|
|
c: 'text/x-c', // .c - C source
|
|
cs: 'text/x-csharp', // .cs - C# source
|
|
cpp: 'text/x-c++', // .cpp - C++ source
|
|
h: 'text/x-h', // .h - C/C++ header
|
|
md: 'text/markdown', // .md - Markdown
|
|
php: 'text/x-php', // .php - PHP source
|
|
py: 'text/x-python', // .py - Python source
|
|
rb: 'text/x-ruby', // .rb - Ruby source
|
|
tex: 'text/x-tex', // .tex - LaTeX source
|
|
java: 'text/x-java', // .java - Java source
|
|
js: 'text/javascript', // .js - JavaScript source
|
|
sh: 'application/x-sh', // .sh - Shell script
|
|
ts: 'application/typescript', // .ts - TypeScript source
|
|
tar: 'application/x-tar', // .tar - Tar archive
|
|
zip: 'application/zip', // .zip - ZIP archive
|
|
txt: 'text/plain', // .txt - Plain text file
|
|
log: 'text/plain', // .log - Log file
|
|
csv: 'text/csv', // .csv - Comma-separated values
|
|
tsv: 'text/tab-separated-values', // .tsv - Tab-separated values
|
|
parquet: 'application/x-parquet', // .parquet - Apache Parquet columnar storage
|
|
json: 'application/json', // .json - JSON file
|
|
xml: 'application/xml', // .xml - XML file
|
|
html: 'text/html', // .html - HTML file
|
|
htm: 'text/html', // .htm - HTML file
|
|
css: 'text/css', // .css - CSS file
|
|
yml: 'application/yaml', // .yml - YAML
|
|
yaml: 'application/yaml', // .yaml - YAML
|
|
sql: 'application/sql', // .sql - SQL (IANA registered)
|
|
dart: 'text/plain', // .dart - Dart source
|
|
coffee: 'application/vnd.coffeescript', // .coffee - CoffeeScript (IANA registered)
|
|
go: 'text/plain', // .go - Go source
|
|
rs: 'text/plain', // .rs - Rust source
|
|
swift: 'text/plain', // .swift - Swift source
|
|
kt: 'text/plain', // .kt - Kotlin source
|
|
kts: 'text/plain', // .kts - Kotlin script
|
|
scala: 'text/plain', // .scala - Scala source
|
|
lua: 'text/plain', // .lua - Lua source
|
|
r: 'text/plain', // .r - R source
|
|
pl: 'text/plain', // .pl - Perl source
|
|
pm: 'text/plain', // .pm - Perl module
|
|
groovy: 'text/plain', // .groovy - Groovy source
|
|
gradle: 'text/plain', // .gradle - Gradle build script
|
|
clj: 'text/plain', // .clj - Clojure source
|
|
cljs: 'text/plain', // .cljs - ClojureScript source
|
|
cljc: 'text/plain', // .cljc - Clojure common source
|
|
elm: 'text/plain', // .elm - Elm source
|
|
erl: 'text/plain', // .erl - Erlang source
|
|
hrl: 'text/plain', // .hrl - Erlang header
|
|
ex: 'text/plain', // .ex - Elixir source
|
|
exs: 'text/plain', // .exs - Elixir script
|
|
hs: 'text/plain', // .hs - Haskell source
|
|
lhs: 'text/plain', // .lhs - Literate Haskell source
|
|
ml: 'text/plain', // .ml - OCaml source
|
|
mli: 'text/plain', // .mli - OCaml interface
|
|
fs: 'text/plain', // .fs - F# source
|
|
fsx: 'text/plain', // .fsx - F# script
|
|
lisp: 'text/plain', // .lisp - Lisp source
|
|
cl: 'text/plain', // .cl - Common Lisp source
|
|
scm: 'text/plain', // .scm - Scheme source
|
|
rkt: 'text/plain', // .rkt - Racket source
|
|
jsx: 'text/plain', // .jsx - React JSX
|
|
tsx: 'text/plain', // .tsx - React TSX
|
|
vue: 'text/plain', // .vue - Vue component
|
|
svelte: 'text/plain', // .svelte - Svelte component
|
|
astro: 'text/plain', // .astro - Astro component
|
|
scss: 'text/plain', // .scss - SCSS source
|
|
sass: 'text/plain', // .sass - Sass source
|
|
less: 'text/plain', // .less - Less source
|
|
styl: 'text/plain', // .styl - Stylus source
|
|
toml: 'text/plain', // .toml - TOML config
|
|
ini: 'text/plain', // .ini - INI config
|
|
cfg: 'text/plain', // .cfg - Config file
|
|
conf: 'text/plain', // .conf - Config file
|
|
env: 'text/plain', // .env - Environment file
|
|
properties: 'text/plain', // .properties - Java properties
|
|
graphql: 'text/plain', // .graphql - GraphQL schema/query
|
|
gql: 'text/plain', // .gql - GraphQL schema/query
|
|
proto: 'text/plain', // .proto - Protocol Buffers
|
|
dockerfile: 'text/plain', // Dockerfile
|
|
makefile: 'text/plain', // Makefile
|
|
cmake: 'text/plain', // .cmake - CMake script
|
|
rake: 'text/plain', // .rake - Rake task
|
|
gemspec: 'text/plain', // .gemspec - Ruby gem spec
|
|
bash: 'text/plain', // .bash - Bash script
|
|
zsh: 'text/plain', // .zsh - Zsh script
|
|
fish: 'text/plain', // .fish - Fish script
|
|
ps1: 'text/plain', // .ps1 - PowerShell script
|
|
psm1: 'text/plain', // .psm1 - PowerShell module
|
|
bat: 'text/plain', // .bat - Batch script
|
|
cmd: 'text/plain', // .cmd - Windows command script
|
|
asm: 'text/plain', // .asm - Assembly source
|
|
s: 'text/plain', // .s - Assembly source
|
|
v: 'text/plain', // .v - V or Verilog source
|
|
zig: 'text/plain', // .zig - Zig source
|
|
nim: 'text/plain', // .nim - Nim source
|
|
cr: 'text/plain', // .cr - Crystal source
|
|
d: 'text/plain', // .d - D source
|
|
pas: 'text/plain', // .pas - Pascal source
|
|
pp: 'text/plain', // .pp - Pascal/Puppet source
|
|
f90: 'text/plain', // .f90 - Fortran 90 source
|
|
f95: 'text/plain', // .f95 - Fortran 95 source
|
|
f03: 'text/plain', // .f03 - Fortran 2003 source
|
|
jl: 'text/plain', // .jl - Julia source
|
|
m: 'text/plain', // .m - Objective-C/MATLAB source
|
|
mm: 'text/plain', // .mm - Objective-C++ source
|
|
ada: 'text/plain', // .ada - Ada source
|
|
adb: 'text/plain', // .adb - Ada body
|
|
ads: 'text/plain', // .ads - Ada spec
|
|
cob: 'text/plain', // .cob - COBOL source
|
|
cbl: 'text/plain', // .cbl - COBOL source
|
|
tcl: 'text/plain', // .tcl - Tcl source
|
|
awk: 'text/plain', // .awk - AWK script
|
|
sed: 'text/plain', // .sed - Sed script
|
|
odt: 'application/vnd.oasis.opendocument.text', // .odt - OpenDocument Text
|
|
ods: 'application/vnd.oasis.opendocument.spreadsheet', // .ods - OpenDocument Spreadsheet
|
|
odp: 'application/vnd.oasis.opendocument.presentation', // .odp - OpenDocument Presentation
|
|
odg: 'application/vnd.oasis.opendocument.graphics', // .odg - OpenDocument Graphics
|
|
};
|
|
|
|
/** Maps image extensions to MIME types for formats browsers may not recognize */
|
|
export const imageTypeMapping: { [key: string]: string } = {
|
|
heic: 'image/heic',
|
|
heif: 'image/heif',
|
|
};
|
|
|
|
/** Normalizes non-standard MIME types that browsers may report to their canonical forms */
|
|
export const mimeTypeAliases: Readonly<Record<string, string>> = {
|
|
'text/x-python-script': 'text/x-python',
|
|
};
|
|
|
|
/**
|
|
* Infers the MIME type from a file's extension when the browser doesn't recognize it,
|
|
* and normalizes known non-standard MIME type aliases to their canonical forms.
|
|
* @param fileName - The file name including its extension
|
|
* @param currentType - The MIME type reported by the browser (may be empty string)
|
|
* @returns The normalized or inferred MIME type; empty string if unresolvable
|
|
*/
|
|
export function inferMimeType(fileName: string, currentType: string): string {
|
|
if (currentType) {
|
|
return mimeTypeAliases[currentType] ?? currentType;
|
|
}
|
|
|
|
const extension = fileName.split('.').pop()?.toLowerCase() ?? '';
|
|
return codeTypeMapping[extension] || imageTypeMapping[extension] || currentType;
|
|
}
|
|
|
|
export const retrievalMimeTypes = [
|
|
/^(text\/(x-c|x-c\+\+|x-h|html|x-java|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|vtt|xml))$/,
|
|
/^(application\/(json|pdf|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation)))$/,
|
|
];
|
|
|
|
export const megabyte = 1024 * 1024;
|
|
/** Helper function to get megabytes value */
|
|
export const mbToBytes = (mb: number): number => mb * megabyte;
|
|
|
|
const defaultSizeLimit = mbToBytes(512);
|
|
const defaultTokenLimit = 100000;
|
|
const assistantsFileConfig = {
|
|
fileLimit: 10,
|
|
fileSizeLimit: defaultSizeLimit,
|
|
totalSizeLimit: defaultSizeLimit,
|
|
supportedMimeTypes,
|
|
disabled: false,
|
|
};
|
|
|
|
export const fileConfig = {
|
|
endpoints: {
|
|
[EModelEndpoint.assistants]: assistantsFileConfig,
|
|
[EModelEndpoint.azureAssistants]: assistantsFileConfig,
|
|
[EModelEndpoint.agents]: assistantsFileConfig,
|
|
[EModelEndpoint.anthropic]: {
|
|
fileLimit: 10,
|
|
fileSizeLimit: defaultSizeLimit,
|
|
totalSizeLimit: defaultSizeLimit,
|
|
supportedMimeTypes,
|
|
disabled: false,
|
|
},
|
|
default: {
|
|
fileLimit: 10,
|
|
fileSizeLimit: defaultSizeLimit,
|
|
totalSizeLimit: defaultSizeLimit,
|
|
supportedMimeTypes,
|
|
disabled: false,
|
|
},
|
|
},
|
|
serverFileSizeLimit: defaultSizeLimit,
|
|
avatarSizeLimit: mbToBytes(2),
|
|
fileTokenLimit: defaultTokenLimit,
|
|
clientImageResize: {
|
|
enabled: false,
|
|
maxWidth: 1900,
|
|
maxHeight: 1900,
|
|
quality: 0.92,
|
|
},
|
|
ocr: {
|
|
supportedMimeTypes: defaultOCRMimeTypes,
|
|
},
|
|
text: {
|
|
supportedMimeTypes: defaultTextMimeTypes,
|
|
},
|
|
stt: {
|
|
supportedMimeTypes: defaultSTTMimeTypes,
|
|
},
|
|
checkType: function (fileType: string, supportedTypes: RegExp[] = supportedMimeTypes) {
|
|
return supportedTypes.some((regex) => regex.test(fileType));
|
|
},
|
|
};
|
|
|
|
const supportedMimeTypesSchema = z
|
|
.array(z.any())
|
|
.optional()
|
|
.refine(
|
|
(mimeTypes) => {
|
|
if (!mimeTypes) {
|
|
return true;
|
|
}
|
|
return mimeTypes.every(
|
|
(mimeType) => mimeType instanceof RegExp || typeof mimeType === 'string',
|
|
);
|
|
},
|
|
{
|
|
message: 'Each mimeType must be a string or a RegExp object.',
|
|
},
|
|
);
|
|
|
|
export const endpointFileConfigSchema = z.object({
|
|
disabled: z.boolean().optional(),
|
|
fileLimit: z.number().min(0).optional(),
|
|
fileSizeLimit: z.number().min(0).optional(),
|
|
totalSizeLimit: z.number().min(0).optional(),
|
|
supportedMimeTypes: supportedMimeTypesSchema.optional(),
|
|
});
|
|
|
|
export const fileConfigSchema = z.object({
|
|
endpoints: z.record(endpointFileConfigSchema).optional(),
|
|
serverFileSizeLimit: z.number().min(0).optional(),
|
|
avatarSizeLimit: z.number().min(0).optional(),
|
|
fileTokenLimit: z.number().min(0).optional(),
|
|
imageGeneration: z
|
|
.object({
|
|
percentage: z.number().min(0).max(100).optional(),
|
|
px: z.number().min(0).optional(),
|
|
})
|
|
.optional(),
|
|
clientImageResize: z
|
|
.object({
|
|
enabled: z.boolean().optional(),
|
|
maxWidth: z.number().min(0).optional(),
|
|
maxHeight: z.number().min(0).optional(),
|
|
quality: z.number().min(0).max(1).optional(),
|
|
})
|
|
.optional(),
|
|
ocr: z
|
|
.object({
|
|
supportedMimeTypes: supportedMimeTypesSchema.optional(),
|
|
})
|
|
.optional(),
|
|
text: z
|
|
.object({
|
|
supportedMimeTypes: supportedMimeTypesSchema.optional(),
|
|
})
|
|
.optional(),
|
|
});
|
|
|
|
export type TFileConfig = z.infer<typeof fileConfigSchema>;
|
|
|
|
/** Helper function to safely convert string patterns to RegExp objects */
|
|
export const convertStringsToRegex = (patterns: string[]): RegExp[] =>
|
|
patterns.reduce((acc: RegExp[], pattern) => {
|
|
try {
|
|
const regex = new RegExp(pattern);
|
|
acc.push(regex);
|
|
} catch (error) {
|
|
console.error(`Invalid regex pattern "${pattern}" skipped.`, error);
|
|
}
|
|
return acc;
|
|
}, []);
|
|
|
|
/**
|
|
* Gets the appropriate endpoint file configuration with standardized lookup logic.
|
|
*
|
|
* @param params - Object containing fileConfig, endpoint, and optional conversationEndpoint
|
|
* @param params.fileConfig - The merged file configuration
|
|
* @param params.endpoint - The endpoint name to look up
|
|
* @param params.conversationEndpoint - Optional conversation endpoint for additional context
|
|
* @returns The endpoint file configuration or undefined
|
|
*/
|
|
/**
|
|
* Merges an endpoint config with the default config to ensure all fields are populated.
|
|
* For document-supported providers, uses the comprehensive MIME type list (includes videos/audio).
|
|
*/
|
|
function mergeWithDefault(
|
|
endpointConfig: EndpointFileConfig,
|
|
defaultConfig: EndpointFileConfig,
|
|
endpoint?: string | null,
|
|
): EndpointFileConfig {
|
|
/** Use comprehensive MIME types for document-supported providers */
|
|
const defaultMimeTypes = isDocumentSupportedProvider(endpoint)
|
|
? supportedMimeTypes
|
|
: defaultConfig.supportedMimeTypes;
|
|
|
|
return {
|
|
disabled: endpointConfig.disabled ?? defaultConfig.disabled,
|
|
fileLimit: endpointConfig.fileLimit ?? defaultConfig.fileLimit,
|
|
fileSizeLimit: endpointConfig.fileSizeLimit ?? defaultConfig.fileSizeLimit,
|
|
totalSizeLimit: endpointConfig.totalSizeLimit ?? defaultConfig.totalSizeLimit,
|
|
supportedMimeTypes: endpointConfig.supportedMimeTypes ?? defaultMimeTypes,
|
|
};
|
|
}
|
|
|
|
export function getEndpointFileConfig(params: {
|
|
fileConfig?: FileConfig | null;
|
|
endpoint?: string | null;
|
|
endpointType?: string | null;
|
|
}): EndpointFileConfig {
|
|
const { fileConfig: mergedFileConfig, endpoint, endpointType } = params;
|
|
|
|
if (!mergedFileConfig?.endpoints) {
|
|
return fileConfig.endpoints.default;
|
|
}
|
|
|
|
/** Compute an effective default by merging user-configured default over the base default */
|
|
const baseDefaultConfig = fileConfig.endpoints.default;
|
|
const userDefaultConfig = mergedFileConfig.endpoints.default;
|
|
const defaultConfig = userDefaultConfig
|
|
? mergeWithDefault(userDefaultConfig, baseDefaultConfig, 'default')
|
|
: baseDefaultConfig;
|
|
|
|
const normalizedEndpoint = normalizeEndpointName(endpoint ?? '');
|
|
const standardEndpoints = new Set([
|
|
'default',
|
|
EModelEndpoint.agents,
|
|
EModelEndpoint.assistants,
|
|
EModelEndpoint.azureAssistants,
|
|
EModelEndpoint.openAI,
|
|
EModelEndpoint.azureOpenAI,
|
|
EModelEndpoint.anthropic,
|
|
EModelEndpoint.google,
|
|
EModelEndpoint.bedrock,
|
|
]);
|
|
|
|
const normalizedEndpointType = normalizeEndpointName(endpointType ?? '');
|
|
const isCustomEndpoint =
|
|
endpointType === EModelEndpoint.custom ||
|
|
(!standardEndpoints.has(normalizedEndpointType) &&
|
|
normalizedEndpoint &&
|
|
!standardEndpoints.has(normalizedEndpoint));
|
|
|
|
if (isCustomEndpoint) {
|
|
/** 1. Check direct endpoint lookup (could be normalized or not) */
|
|
if (endpoint && mergedFileConfig.endpoints[endpoint]) {
|
|
return mergeWithDefault(mergedFileConfig.endpoints[endpoint], defaultConfig, endpoint);
|
|
}
|
|
/** 2. Check normalized endpoint lookup (skip standard endpoint keys) */
|
|
for (const key in mergedFileConfig.endpoints) {
|
|
if (!standardEndpoints.has(key) && normalizeEndpointName(key) === normalizedEndpoint) {
|
|
return mergeWithDefault(mergedFileConfig.endpoints[key], defaultConfig, key);
|
|
}
|
|
}
|
|
/** 3. Fallback to generic 'custom' config if any */
|
|
if (mergedFileConfig.endpoints[EModelEndpoint.custom]) {
|
|
return mergeWithDefault(
|
|
mergedFileConfig.endpoints[EModelEndpoint.custom],
|
|
defaultConfig,
|
|
endpoint,
|
|
);
|
|
}
|
|
/** 4. Fallback to 'agents' (all custom endpoints are non-assistants) */
|
|
if (mergedFileConfig.endpoints[EModelEndpoint.agents]) {
|
|
return mergeWithDefault(
|
|
mergedFileConfig.endpoints[EModelEndpoint.agents],
|
|
defaultConfig,
|
|
endpoint,
|
|
);
|
|
}
|
|
/** 5. Fallback to default */
|
|
return defaultConfig;
|
|
}
|
|
|
|
/** Check endpointType first (most reliable for standard endpoints) */
|
|
if (endpointType && mergedFileConfig.endpoints[endpointType]) {
|
|
return mergeWithDefault(mergedFileConfig.endpoints[endpointType], defaultConfig, endpointType);
|
|
}
|
|
|
|
/** Check direct endpoint lookup */
|
|
if (endpoint && mergedFileConfig.endpoints[endpoint]) {
|
|
return mergeWithDefault(mergedFileConfig.endpoints[endpoint], defaultConfig, endpoint);
|
|
}
|
|
|
|
/** Check normalized endpoint */
|
|
if (normalizedEndpoint && mergedFileConfig.endpoints[normalizedEndpoint]) {
|
|
return mergeWithDefault(
|
|
mergedFileConfig.endpoints[normalizedEndpoint],
|
|
defaultConfig,
|
|
normalizedEndpoint,
|
|
);
|
|
}
|
|
|
|
/** Fallback to agents if endpoint is explicitly agents */
|
|
const isAgents = isAgentsEndpoint(normalizedEndpointType || normalizedEndpoint);
|
|
if (isAgents && mergedFileConfig.endpoints[EModelEndpoint.agents]) {
|
|
return mergeWithDefault(
|
|
mergedFileConfig.endpoints[EModelEndpoint.agents],
|
|
defaultConfig,
|
|
EModelEndpoint.agents,
|
|
);
|
|
}
|
|
|
|
/** Return default config */
|
|
return defaultConfig;
|
|
}
|
|
|
|
export function mergeFileConfig(dynamic: z.infer<typeof fileConfigSchema> | undefined): FileConfig {
|
|
const mergedConfig: FileConfig = {
|
|
...fileConfig,
|
|
endpoints: {
|
|
...fileConfig.endpoints,
|
|
},
|
|
ocr: {
|
|
...fileConfig.ocr,
|
|
supportedMimeTypes: fileConfig.ocr?.supportedMimeTypes || [],
|
|
},
|
|
text: {
|
|
...fileConfig.text,
|
|
supportedMimeTypes: fileConfig.text?.supportedMimeTypes || [],
|
|
},
|
|
stt: {
|
|
...fileConfig.stt,
|
|
supportedMimeTypes: fileConfig.stt?.supportedMimeTypes || [],
|
|
},
|
|
};
|
|
if (!dynamic) {
|
|
return mergedConfig;
|
|
}
|
|
|
|
if (dynamic.serverFileSizeLimit !== undefined) {
|
|
mergedConfig.serverFileSizeLimit = mbToBytes(dynamic.serverFileSizeLimit);
|
|
}
|
|
|
|
if (dynamic.avatarSizeLimit !== undefined) {
|
|
mergedConfig.avatarSizeLimit = mbToBytes(dynamic.avatarSizeLimit);
|
|
}
|
|
|
|
if (dynamic.fileTokenLimit !== undefined) {
|
|
mergedConfig.fileTokenLimit = dynamic.fileTokenLimit;
|
|
}
|
|
|
|
// Merge clientImageResize configuration
|
|
if (dynamic.clientImageResize !== undefined) {
|
|
mergedConfig.clientImageResize = {
|
|
...mergedConfig.clientImageResize,
|
|
...dynamic.clientImageResize,
|
|
};
|
|
}
|
|
|
|
if (dynamic.ocr !== undefined) {
|
|
mergedConfig.ocr = {
|
|
...mergedConfig.ocr,
|
|
...dynamic.ocr,
|
|
};
|
|
if (dynamic.ocr.supportedMimeTypes) {
|
|
mergedConfig.ocr.supportedMimeTypes = convertStringsToRegex(dynamic.ocr.supportedMimeTypes);
|
|
}
|
|
}
|
|
|
|
if (dynamic.text !== undefined) {
|
|
mergedConfig.text = {
|
|
...mergedConfig.text,
|
|
...dynamic.text,
|
|
};
|
|
if (dynamic.text.supportedMimeTypes) {
|
|
mergedConfig.text.supportedMimeTypes = convertStringsToRegex(dynamic.text.supportedMimeTypes);
|
|
}
|
|
}
|
|
|
|
if (!dynamic.endpoints) {
|
|
return mergedConfig;
|
|
}
|
|
|
|
for (const key in dynamic.endpoints) {
|
|
const dynamicEndpoint = (dynamic.endpoints as Record<string, EndpointFileConfig>)[key];
|
|
|
|
/** Deep copy the base endpoint config if it exists to prevent mutation */
|
|
if (!mergedConfig.endpoints[key]) {
|
|
mergedConfig.endpoints[key] = {};
|
|
} else {
|
|
mergedConfig.endpoints[key] = { ...mergedConfig.endpoints[key] };
|
|
}
|
|
|
|
const mergedEndpoint = mergedConfig.endpoints[key];
|
|
|
|
if (dynamicEndpoint.disabled === true) {
|
|
mergedEndpoint.disabled = true;
|
|
mergedEndpoint.fileLimit = 0;
|
|
mergedEndpoint.fileSizeLimit = 0;
|
|
mergedEndpoint.totalSizeLimit = 0;
|
|
mergedEndpoint.supportedMimeTypes = [];
|
|
continue;
|
|
}
|
|
|
|
if (dynamicEndpoint.fileSizeLimit !== undefined) {
|
|
mergedEndpoint.fileSizeLimit = mbToBytes(dynamicEndpoint.fileSizeLimit);
|
|
}
|
|
|
|
if (dynamicEndpoint.totalSizeLimit !== undefined) {
|
|
mergedEndpoint.totalSizeLimit = mbToBytes(dynamicEndpoint.totalSizeLimit);
|
|
}
|
|
|
|
const configKeys = ['fileLimit'] as const;
|
|
configKeys.forEach((field) => {
|
|
if (dynamicEndpoint[field] !== undefined) {
|
|
mergedEndpoint[field] = dynamicEndpoint[field];
|
|
}
|
|
});
|
|
|
|
if (dynamicEndpoint.disabled !== undefined) {
|
|
mergedEndpoint.disabled = dynamicEndpoint.disabled;
|
|
}
|
|
|
|
if (dynamicEndpoint.supportedMimeTypes) {
|
|
mergedEndpoint.supportedMimeTypes = convertStringsToRegex(
|
|
dynamicEndpoint.supportedMimeTypes as unknown as string[],
|
|
);
|
|
}
|
|
}
|
|
|
|
return mergedConfig;
|
|
}
|