LibreChat/packages/data-provider/src/file-config.ts
Pol Burkardt Freire 7e74165c3c
📖 feat: Add Native ODT Document Parser Support (#12303)
* fix: add ODT support to native document parser

* fix: replace execSync with jszip for ODT parsing

* docs: update documentParserMimeTypes comment to include odt

* fix: improve ODT XML extraction and add empty.odt fixture

- Scope extraction to <office:body> to exclude metadata/style nodes
- Map </text:p> and </text:h> closings to newlines, preserving paragraph
  structure instead of collapsing everything to a single line
- Handle <text:line-break/> as explicit newlines
- Strip remaining tags, normalize horizontal whitespace, cap consecutive
  blank lines at one
- Regenerate sample.odt as a two-paragraph fixture so the test exercises
  multi-paragraph output
- Add empty.odt fixture and test asserting 'No text found in document'

* fix: address review findings in ODT parser

- Use static `import JSZip from 'jszip'` instead of dynamic import;
  jszip is CommonJS-only with no ESM/Jest-isolation concern (F1)
- Decode the five standard XML entities after tag-stripping so
  documents with &, <, >, ", ' send correct text to the LLM (F2)
- Remove @types/jszip devDependency; jszip ships bundled declarations
  and @types/jszip is a stale 2020 stub that would shadow them (F3)
- Handle <text:tab/> → \t and <text:s .../> → ' ' before the generic
  tag stripper so tab-aligned and multi-space content is preserved (F4)
- Add sample-entities.odt fixture and test covering entity decoding,
  tab, and spacing-element handling (F5)
- Rename 'throws for empty odt' → 'throws for odt with no extractable
  text' to distinguish from a zero-byte/corrupt file case (F8)

* fix: add decompressed content size cap to odtToText (F6)

Reads uncompressed entry sizes from the JSZip internal metadata before
extracting any content. Throws if the total exceeds 50MB, preventing a
crafted ODT with a high-ratio compressed payload from exhausting heap.

Adds a corresponding test using a real DEFLATE-compressed ZIP (~51KB on
disk, 51MB uncompressed) to verify the guard fires before any extraction.

* fix: add java to codeTypeMapping for file upload support

.java files were rejected with "Unable to determine file type" because
browsers send an empty MIME type for them and codeTypeMapping had no
'java' entry for inferMimeType() to fall back on.

text/x-java was already present in all five validation lists
(fullMimeTypesList, codeInterpreterMimeTypesList, retrievalMimeTypesList,
textMimeTypes, retrievalMimeTypes), so mapping to it (not text/plain)
ensures .java uploads work for both File Search and Code Interpreter.

Closes #12307

* fix: address follow-up review findings (A-E)

A: regenerate package-lock.json after removing @types/jszip from
   package.json; without this npm ci was still installing the stale
   2020 type stubs and TypeScript was resolving against them
B: replace dynamic import('jszip') in the zip-bomb test with the same
   static import already used in production; jszip is CJS-only with no
   ESM/Jest isolation concern
C: document that the _data.uncompressedSize guard fails open if jszip
   renames the private field (accepted limitation, test would catch it)
D: rename 'preserves tabs' test to 'normalizes tab and spacing elements
   to spaces' since <text:tab> is collapsed to a space, not kept as \t
E: fix test.each([ formatting artifact (missing newline after '[')

---------

Co-authored-by: Danny Avila <danny@librechat.ai>
2026-03-19 15:49:52 -04:00

764 lines
25 KiB
TypeScript

import { z } from 'zod';
import type { EndpointFileConfig, FileConfig } from './types/files';
import { EModelEndpoint, isAgentsEndpoint, isDocumentSupportedProvider } from './schemas';
import { normalizeEndpointName } from './utils';
export const supportsFiles = {
[EModelEndpoint.openAI]: true,
[EModelEndpoint.google]: true,
[EModelEndpoint.assistants]: true,
[EModelEndpoint.azureAssistants]: true,
[EModelEndpoint.agents]: true,
[EModelEndpoint.azureOpenAI]: true,
[EModelEndpoint.anthropic]: true,
[EModelEndpoint.custom]: true,
[EModelEndpoint.bedrock]: true,
};
export const excelFileTypes = [
'application/vnd.ms-excel',
'application/msexcel',
'application/x-msexcel',
'application/x-ms-excel',
'application/x-excel',
'application/x-dos_ms_excel',
'application/xls',
'application/x-xls',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
];
export const fullMimeTypesList = [
'text/x-c',
'text/x-c++',
'application/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/html',
'text/x-java',
'application/json',
'text/markdown',
'application/pdf',
'text/x-php',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'text/x-python',
'text/x-script.python',
'text/x-ruby',
'text/x-tex',
'text/plain',
'text/css',
'text/vtt',
'image/jpeg',
'text/javascript',
'image/gif',
'image/png',
'image/heic',
'image/heif',
'application/x-tar',
'application/x-sh',
'application/typescript',
'application/sql',
'application/yaml',
'application/vnd.coffeescript',
'application/xml',
'application/zip',
'application/x-parquet',
'application/vnd.oasis.opendocument.text',
'application/vnd.oasis.opendocument.spreadsheet',
'application/vnd.oasis.opendocument.presentation',
'application/vnd.oasis.opendocument.graphics',
'image/svg',
'image/svg+xml',
// Video formats
'video/mp4',
'video/avi',
'video/mov',
'video/wmv',
'video/flv',
'video/webm',
'video/mkv',
'video/m4v',
'video/3gp',
'video/ogv',
// Audio formats
'audio/mp3',
'audio/wav',
'audio/ogg',
'audio/m4a',
'audio/aac',
'audio/flac',
'audio/wma',
'audio/opus',
'audio/mpeg',
...excelFileTypes,
];
export const codeInterpreterMimeTypesList = [
'text/x-c',
'text/x-c++',
'application/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/html',
'text/x-java',
'application/json',
'text/markdown',
'application/pdf',
'text/x-php',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'text/x-python',
'text/x-script.python',
'text/x-ruby',
'text/x-tex',
'text/plain',
'text/css',
'image/jpeg',
'text/javascript',
'image/gif',
'image/png',
'image/heic',
'image/heif',
'application/x-tar',
'application/typescript',
'application/xml',
'application/zip',
'application/x-parquet',
...excelFileTypes,
];
export const retrievalMimeTypesList = [
'text/x-c',
'text/x-c++',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/html',
'text/x-java',
'application/json',
'text/markdown',
'application/pdf',
'text/x-php',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'text/x-python',
'text/x-script.python',
'text/x-ruby',
'text/x-tex',
'text/plain',
];
export const imageExtRegex = /\.(jpg|jpeg|png|gif|webp|heic|heif)$/i;
/** @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_DocumentBlock.html */
export type BedrockDocumentFormat =
| 'pdf'
| 'csv'
| 'doc'
| 'docx'
| 'xls'
| 'xlsx'
| 'html'
| 'txt'
| 'md';
/** Maps MIME types to Bedrock Converse API document format values */
export const bedrockDocumentFormats: Record<string, BedrockDocumentFormat> = {
'application/pdf': 'pdf',
'text/csv': 'csv',
'application/csv': 'csv',
'application/msword': 'doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
'application/vnd.ms-excel': 'xls',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
'text/html': 'html',
'text/plain': 'txt',
'text/markdown': 'md',
};
export const isBedrockDocumentType = (mimeType?: string): boolean =>
mimeType != null && mimeType in bedrockDocumentFormats;
/** File extensions accepted by Bedrock document uploads (for input accept attributes) */
export const bedrockDocumentExtensions =
'.pdf,.csv,.doc,.docx,.xls,.xlsx,.html,.htm,.txt,.md,application/pdf,text/csv,application/csv,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,text/html,text/plain,text/markdown';
export const excelMimeTypes =
/^application\/(vnd\.ms-excel|msexcel|x-msexcel|x-ms-excel|x-excel|x-dos_ms_excel|xls|x-xls|vnd\.openxmlformats-officedocument\.spreadsheetml\.sheet)$/;
export const textMimeTypes =
/^(text\/(x-c|x-csharp|tab-separated-values|x-c\+\+|x-h|x-java|html|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|css|vtt|javascript|csv|xml))$/;
export const applicationMimeTypes =
/^(application\/(epub\+zip|csv|json|msword|pdf|x-tar|x-sh|typescript|sql|yaml|x-parquet|vnd\.apache\.parquet|vnd\.coffeescript|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation|spreadsheetml\.sheet)|vnd\.oasis\.opendocument\.(text|spreadsheet|presentation|graphics)|xml|zip))$/;
export const imageMimeTypes = /^image\/(jpeg|gif|png|webp|heic|heif)$/;
export const audioMimeTypes =
/^audio\/(mp3|mpeg|mpeg3|wav|wave|x-wav|ogg|vorbis|mp4|m4a|x-m4a|flac|x-flac|webm|aac|wma|opus)$/;
export const videoMimeTypes = /^video\/(mp4|avi|mov|wmv|flv|webm|mkv|m4v|3gp|ogv)$/;
export const defaultOCRMimeTypes = [
imageMimeTypes,
excelMimeTypes,
/^application\/pdf$/,
/^application\/vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation)$/,
/^application\/vnd\.ms-(word|powerpoint)$/,
/^application\/epub\+zip$/,
/^application\/vnd\.oasis\.opendocument\.(text|spreadsheet|presentation|graphics)$/,
];
/** MIME types handled by the built-in document parser (pdf, docx, excel variants, ods/odt) */
export const documentParserMimeTypes = [
excelMimeTypes,
/^application\/pdf$/,
/^application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.document$/,
/^application\/vnd\.oasis\.opendocument\.spreadsheet$/,
/^application\/vnd\.oasis\.opendocument\.text$/,
];
export const defaultTextMimeTypes = [/^[\w.-]+\/[\w.-]+$/];
export const defaultSTTMimeTypes = [audioMimeTypes];
export const supportedMimeTypes = [
textMimeTypes,
excelMimeTypes,
applicationMimeTypes,
imageMimeTypes,
videoMimeTypes,
audioMimeTypes,
/** Supported by LC Code Interpreter API */
/^image\/(svg|svg\+xml)$/,
];
export const codeInterpreterMimeTypes = [
textMimeTypes,
excelMimeTypes,
applicationMimeTypes,
imageMimeTypes,
];
export const codeTypeMapping: { [key: string]: string } = {
c: 'text/x-c', // .c - C source
cs: 'text/x-csharp', // .cs - C# source
cpp: 'text/x-c++', // .cpp - C++ source
h: 'text/x-h', // .h - C/C++ header
md: 'text/markdown', // .md - Markdown
php: 'text/x-php', // .php - PHP source
py: 'text/x-python', // .py - Python source
rb: 'text/x-ruby', // .rb - Ruby source
tex: 'text/x-tex', // .tex - LaTeX source
java: 'text/x-java', // .java - Java source
js: 'text/javascript', // .js - JavaScript source
sh: 'application/x-sh', // .sh - Shell script
ts: 'application/typescript', // .ts - TypeScript source
tar: 'application/x-tar', // .tar - Tar archive
zip: 'application/zip', // .zip - ZIP archive
txt: 'text/plain', // .txt - Plain text file
log: 'text/plain', // .log - Log file
csv: 'text/csv', // .csv - Comma-separated values
tsv: 'text/tab-separated-values', // .tsv - Tab-separated values
parquet: 'application/x-parquet', // .parquet - Apache Parquet columnar storage
json: 'application/json', // .json - JSON file
xml: 'application/xml', // .xml - XML file
html: 'text/html', // .html - HTML file
htm: 'text/html', // .htm - HTML file
css: 'text/css', // .css - CSS file
yml: 'application/yaml', // .yml - YAML
yaml: 'application/yaml', // .yaml - YAML
sql: 'application/sql', // .sql - SQL (IANA registered)
dart: 'text/plain', // .dart - Dart source
coffee: 'application/vnd.coffeescript', // .coffee - CoffeeScript (IANA registered)
go: 'text/plain', // .go - Go source
rs: 'text/plain', // .rs - Rust source
swift: 'text/plain', // .swift - Swift source
kt: 'text/plain', // .kt - Kotlin source
kts: 'text/plain', // .kts - Kotlin script
scala: 'text/plain', // .scala - Scala source
lua: 'text/plain', // .lua - Lua source
r: 'text/plain', // .r - R source
pl: 'text/plain', // .pl - Perl source
pm: 'text/plain', // .pm - Perl module
groovy: 'text/plain', // .groovy - Groovy source
gradle: 'text/plain', // .gradle - Gradle build script
clj: 'text/plain', // .clj - Clojure source
cljs: 'text/plain', // .cljs - ClojureScript source
cljc: 'text/plain', // .cljc - Clojure common source
elm: 'text/plain', // .elm - Elm source
erl: 'text/plain', // .erl - Erlang source
hrl: 'text/plain', // .hrl - Erlang header
ex: 'text/plain', // .ex - Elixir source
exs: 'text/plain', // .exs - Elixir script
hs: 'text/plain', // .hs - Haskell source
lhs: 'text/plain', // .lhs - Literate Haskell source
ml: 'text/plain', // .ml - OCaml source
mli: 'text/plain', // .mli - OCaml interface
fs: 'text/plain', // .fs - F# source
fsx: 'text/plain', // .fsx - F# script
lisp: 'text/plain', // .lisp - Lisp source
cl: 'text/plain', // .cl - Common Lisp source
scm: 'text/plain', // .scm - Scheme source
rkt: 'text/plain', // .rkt - Racket source
jsx: 'text/plain', // .jsx - React JSX
tsx: 'text/plain', // .tsx - React TSX
vue: 'text/plain', // .vue - Vue component
svelte: 'text/plain', // .svelte - Svelte component
astro: 'text/plain', // .astro - Astro component
scss: 'text/plain', // .scss - SCSS source
sass: 'text/plain', // .sass - Sass source
less: 'text/plain', // .less - Less source
styl: 'text/plain', // .styl - Stylus source
toml: 'text/plain', // .toml - TOML config
ini: 'text/plain', // .ini - INI config
cfg: 'text/plain', // .cfg - Config file
conf: 'text/plain', // .conf - Config file
env: 'text/plain', // .env - Environment file
properties: 'text/plain', // .properties - Java properties
graphql: 'text/plain', // .graphql - GraphQL schema/query
gql: 'text/plain', // .gql - GraphQL schema/query
proto: 'text/plain', // .proto - Protocol Buffers
dockerfile: 'text/plain', // Dockerfile
makefile: 'text/plain', // Makefile
cmake: 'text/plain', // .cmake - CMake script
rake: 'text/plain', // .rake - Rake task
gemspec: 'text/plain', // .gemspec - Ruby gem spec
bash: 'text/plain', // .bash - Bash script
zsh: 'text/plain', // .zsh - Zsh script
fish: 'text/plain', // .fish - Fish script
ps1: 'text/plain', // .ps1 - PowerShell script
psm1: 'text/plain', // .psm1 - PowerShell module
bat: 'text/plain', // .bat - Batch script
cmd: 'text/plain', // .cmd - Windows command script
asm: 'text/plain', // .asm - Assembly source
s: 'text/plain', // .s - Assembly source
v: 'text/plain', // .v - V or Verilog source
zig: 'text/plain', // .zig - Zig source
nim: 'text/plain', // .nim - Nim source
cr: 'text/plain', // .cr - Crystal source
d: 'text/plain', // .d - D source
pas: 'text/plain', // .pas - Pascal source
pp: 'text/plain', // .pp - Pascal/Puppet source
f90: 'text/plain', // .f90 - Fortran 90 source
f95: 'text/plain', // .f95 - Fortran 95 source
f03: 'text/plain', // .f03 - Fortran 2003 source
jl: 'text/plain', // .jl - Julia source
m: 'text/plain', // .m - Objective-C/MATLAB source
mm: 'text/plain', // .mm - Objective-C++ source
ada: 'text/plain', // .ada - Ada source
adb: 'text/plain', // .adb - Ada body
ads: 'text/plain', // .ads - Ada spec
cob: 'text/plain', // .cob - COBOL source
cbl: 'text/plain', // .cbl - COBOL source
tcl: 'text/plain', // .tcl - Tcl source
awk: 'text/plain', // .awk - AWK script
sed: 'text/plain', // .sed - Sed script
odt: 'application/vnd.oasis.opendocument.text', // .odt - OpenDocument Text
ods: 'application/vnd.oasis.opendocument.spreadsheet', // .ods - OpenDocument Spreadsheet
odp: 'application/vnd.oasis.opendocument.presentation', // .odp - OpenDocument Presentation
odg: 'application/vnd.oasis.opendocument.graphics', // .odg - OpenDocument Graphics
};
/** Maps image extensions to MIME types for formats browsers may not recognize */
export const imageTypeMapping: { [key: string]: string } = {
heic: 'image/heic',
heif: 'image/heif',
};
/** Normalizes non-standard MIME types that browsers may report to their canonical forms */
export const mimeTypeAliases: Readonly<Record<string, string>> = {
'text/x-python-script': 'text/x-python',
};
/**
* Infers the MIME type from a file's extension when the browser doesn't recognize it,
* and normalizes known non-standard MIME type aliases to their canonical forms.
* @param fileName - The file name including its extension
* @param currentType - The MIME type reported by the browser (may be empty string)
* @returns The normalized or inferred MIME type; empty string if unresolvable
*/
export function inferMimeType(fileName: string, currentType: string): string {
if (currentType) {
return mimeTypeAliases[currentType] ?? currentType;
}
const extension = fileName.split('.').pop()?.toLowerCase() ?? '';
return codeTypeMapping[extension] || imageTypeMapping[extension] || currentType;
}
export const retrievalMimeTypes = [
/^(text\/(x-c|x-c\+\+|x-h|html|x-java|markdown|x-php|x-python|x-script\.python|x-ruby|x-tex|plain|vtt|xml))$/,
/^(application\/(json|pdf|vnd\.openxmlformats-officedocument\.(wordprocessingml\.document|presentationml\.presentation)))$/,
];
export const megabyte = 1024 * 1024;
/** Helper function to get megabytes value */
export const mbToBytes = (mb: number): number => mb * megabyte;
const defaultSizeLimit = mbToBytes(512);
const defaultTokenLimit = 100000;
const assistantsFileConfig = {
fileLimit: 10,
fileSizeLimit: defaultSizeLimit,
totalSizeLimit: defaultSizeLimit,
supportedMimeTypes,
disabled: false,
};
export const fileConfig = {
endpoints: {
[EModelEndpoint.assistants]: assistantsFileConfig,
[EModelEndpoint.azureAssistants]: assistantsFileConfig,
[EModelEndpoint.agents]: assistantsFileConfig,
[EModelEndpoint.anthropic]: {
fileLimit: 10,
fileSizeLimit: defaultSizeLimit,
totalSizeLimit: defaultSizeLimit,
supportedMimeTypes,
disabled: false,
},
default: {
fileLimit: 10,
fileSizeLimit: defaultSizeLimit,
totalSizeLimit: defaultSizeLimit,
supportedMimeTypes,
disabled: false,
},
},
serverFileSizeLimit: defaultSizeLimit,
avatarSizeLimit: mbToBytes(2),
fileTokenLimit: defaultTokenLimit,
clientImageResize: {
enabled: false,
maxWidth: 1900,
maxHeight: 1900,
quality: 0.92,
},
ocr: {
supportedMimeTypes: defaultOCRMimeTypes,
},
text: {
supportedMimeTypes: defaultTextMimeTypes,
},
stt: {
supportedMimeTypes: defaultSTTMimeTypes,
},
checkType: function (fileType: string, supportedTypes: RegExp[] = supportedMimeTypes) {
return supportedTypes.some((regex) => regex.test(fileType));
},
};
const supportedMimeTypesSchema = z
.array(z.any())
.optional()
.refine(
(mimeTypes) => {
if (!mimeTypes) {
return true;
}
return mimeTypes.every(
(mimeType) => mimeType instanceof RegExp || typeof mimeType === 'string',
);
},
{
message: 'Each mimeType must be a string or a RegExp object.',
},
);
export const endpointFileConfigSchema = z.object({
disabled: z.boolean().optional(),
fileLimit: z.number().min(0).optional(),
fileSizeLimit: z.number().min(0).optional(),
totalSizeLimit: z.number().min(0).optional(),
supportedMimeTypes: supportedMimeTypesSchema.optional(),
});
export const fileConfigSchema = z.object({
endpoints: z.record(endpointFileConfigSchema).optional(),
serverFileSizeLimit: z.number().min(0).optional(),
avatarSizeLimit: z.number().min(0).optional(),
fileTokenLimit: z.number().min(0).optional(),
imageGeneration: z
.object({
percentage: z.number().min(0).max(100).optional(),
px: z.number().min(0).optional(),
})
.optional(),
clientImageResize: z
.object({
enabled: z.boolean().optional(),
maxWidth: z.number().min(0).optional(),
maxHeight: z.number().min(0).optional(),
quality: z.number().min(0).max(1).optional(),
})
.optional(),
ocr: z
.object({
supportedMimeTypes: supportedMimeTypesSchema.optional(),
})
.optional(),
text: z
.object({
supportedMimeTypes: supportedMimeTypesSchema.optional(),
})
.optional(),
});
export type TFileConfig = z.infer<typeof fileConfigSchema>;
/** Helper function to safely convert string patterns to RegExp objects */
export const convertStringsToRegex = (patterns: string[]): RegExp[] =>
patterns.reduce((acc: RegExp[], pattern) => {
try {
const regex = new RegExp(pattern);
acc.push(regex);
} catch (error) {
console.error(`Invalid regex pattern "${pattern}" skipped.`, error);
}
return acc;
}, []);
/**
* Gets the appropriate endpoint file configuration with standardized lookup logic.
*
* @param params - Object containing fileConfig, endpoint, and optional conversationEndpoint
* @param params.fileConfig - The merged file configuration
* @param params.endpoint - The endpoint name to look up
* @param params.conversationEndpoint - Optional conversation endpoint for additional context
* @returns The endpoint file configuration or undefined
*/
/**
* Merges an endpoint config with the default config to ensure all fields are populated.
* For document-supported providers, uses the comprehensive MIME type list (includes videos/audio).
*/
function mergeWithDefault(
endpointConfig: EndpointFileConfig,
defaultConfig: EndpointFileConfig,
endpoint?: string | null,
): EndpointFileConfig {
/** Use comprehensive MIME types for document-supported providers */
const defaultMimeTypes = isDocumentSupportedProvider(endpoint)
? supportedMimeTypes
: defaultConfig.supportedMimeTypes;
return {
disabled: endpointConfig.disabled ?? defaultConfig.disabled,
fileLimit: endpointConfig.fileLimit ?? defaultConfig.fileLimit,
fileSizeLimit: endpointConfig.fileSizeLimit ?? defaultConfig.fileSizeLimit,
totalSizeLimit: endpointConfig.totalSizeLimit ?? defaultConfig.totalSizeLimit,
supportedMimeTypes: endpointConfig.supportedMimeTypes ?? defaultMimeTypes,
};
}
export function getEndpointFileConfig(params: {
fileConfig?: FileConfig | null;
endpoint?: string | null;
endpointType?: string | null;
}): EndpointFileConfig {
const { fileConfig: mergedFileConfig, endpoint, endpointType } = params;
if (!mergedFileConfig?.endpoints) {
return fileConfig.endpoints.default;
}
/** Compute an effective default by merging user-configured default over the base default */
const baseDefaultConfig = fileConfig.endpoints.default;
const userDefaultConfig = mergedFileConfig.endpoints.default;
const defaultConfig = userDefaultConfig
? mergeWithDefault(userDefaultConfig, baseDefaultConfig, 'default')
: baseDefaultConfig;
const normalizedEndpoint = normalizeEndpointName(endpoint ?? '');
const standardEndpoints = new Set([
'default',
EModelEndpoint.agents,
EModelEndpoint.assistants,
EModelEndpoint.azureAssistants,
EModelEndpoint.openAI,
EModelEndpoint.azureOpenAI,
EModelEndpoint.anthropic,
EModelEndpoint.google,
EModelEndpoint.bedrock,
]);
const normalizedEndpointType = normalizeEndpointName(endpointType ?? '');
const isCustomEndpoint =
endpointType === EModelEndpoint.custom ||
(!standardEndpoints.has(normalizedEndpointType) &&
normalizedEndpoint &&
!standardEndpoints.has(normalizedEndpoint));
if (isCustomEndpoint) {
/** 1. Check direct endpoint lookup (could be normalized or not) */
if (endpoint && mergedFileConfig.endpoints[endpoint]) {
return mergeWithDefault(mergedFileConfig.endpoints[endpoint], defaultConfig, endpoint);
}
/** 2. Check normalized endpoint lookup (skip standard endpoint keys) */
for (const key in mergedFileConfig.endpoints) {
if (!standardEndpoints.has(key) && normalizeEndpointName(key) === normalizedEndpoint) {
return mergeWithDefault(mergedFileConfig.endpoints[key], defaultConfig, key);
}
}
/** 3. Fallback to generic 'custom' config if any */
if (mergedFileConfig.endpoints[EModelEndpoint.custom]) {
return mergeWithDefault(
mergedFileConfig.endpoints[EModelEndpoint.custom],
defaultConfig,
endpoint,
);
}
/** 4. Fallback to 'agents' (all custom endpoints are non-assistants) */
if (mergedFileConfig.endpoints[EModelEndpoint.agents]) {
return mergeWithDefault(
mergedFileConfig.endpoints[EModelEndpoint.agents],
defaultConfig,
endpoint,
);
}
/** 5. Fallback to default */
return defaultConfig;
}
/** Check endpointType first (most reliable for standard endpoints) */
if (endpointType && mergedFileConfig.endpoints[endpointType]) {
return mergeWithDefault(mergedFileConfig.endpoints[endpointType], defaultConfig, endpointType);
}
/** Check direct endpoint lookup */
if (endpoint && mergedFileConfig.endpoints[endpoint]) {
return mergeWithDefault(mergedFileConfig.endpoints[endpoint], defaultConfig, endpoint);
}
/** Check normalized endpoint */
if (normalizedEndpoint && mergedFileConfig.endpoints[normalizedEndpoint]) {
return mergeWithDefault(
mergedFileConfig.endpoints[normalizedEndpoint],
defaultConfig,
normalizedEndpoint,
);
}
/** Fallback to agents if endpoint is explicitly agents */
const isAgents = isAgentsEndpoint(normalizedEndpointType || normalizedEndpoint);
if (isAgents && mergedFileConfig.endpoints[EModelEndpoint.agents]) {
return mergeWithDefault(
mergedFileConfig.endpoints[EModelEndpoint.agents],
defaultConfig,
EModelEndpoint.agents,
);
}
/** Return default config */
return defaultConfig;
}
export function mergeFileConfig(dynamic: z.infer<typeof fileConfigSchema> | undefined): FileConfig {
const mergedConfig: FileConfig = {
...fileConfig,
endpoints: {
...fileConfig.endpoints,
},
ocr: {
...fileConfig.ocr,
supportedMimeTypes: fileConfig.ocr?.supportedMimeTypes || [],
},
text: {
...fileConfig.text,
supportedMimeTypes: fileConfig.text?.supportedMimeTypes || [],
},
stt: {
...fileConfig.stt,
supportedMimeTypes: fileConfig.stt?.supportedMimeTypes || [],
},
};
if (!dynamic) {
return mergedConfig;
}
if (dynamic.serverFileSizeLimit !== undefined) {
mergedConfig.serverFileSizeLimit = mbToBytes(dynamic.serverFileSizeLimit);
}
if (dynamic.avatarSizeLimit !== undefined) {
mergedConfig.avatarSizeLimit = mbToBytes(dynamic.avatarSizeLimit);
}
if (dynamic.fileTokenLimit !== undefined) {
mergedConfig.fileTokenLimit = dynamic.fileTokenLimit;
}
// Merge clientImageResize configuration
if (dynamic.clientImageResize !== undefined) {
mergedConfig.clientImageResize = {
...mergedConfig.clientImageResize,
...dynamic.clientImageResize,
};
}
if (dynamic.ocr !== undefined) {
mergedConfig.ocr = {
...mergedConfig.ocr,
...dynamic.ocr,
};
if (dynamic.ocr.supportedMimeTypes) {
mergedConfig.ocr.supportedMimeTypes = convertStringsToRegex(dynamic.ocr.supportedMimeTypes);
}
}
if (dynamic.text !== undefined) {
mergedConfig.text = {
...mergedConfig.text,
...dynamic.text,
};
if (dynamic.text.supportedMimeTypes) {
mergedConfig.text.supportedMimeTypes = convertStringsToRegex(dynamic.text.supportedMimeTypes);
}
}
if (!dynamic.endpoints) {
return mergedConfig;
}
for (const key in dynamic.endpoints) {
const dynamicEndpoint = (dynamic.endpoints as Record<string, EndpointFileConfig>)[key];
/** Deep copy the base endpoint config if it exists to prevent mutation */
if (!mergedConfig.endpoints[key]) {
mergedConfig.endpoints[key] = {};
} else {
mergedConfig.endpoints[key] = { ...mergedConfig.endpoints[key] };
}
const mergedEndpoint = mergedConfig.endpoints[key];
if (dynamicEndpoint.disabled === true) {
mergedEndpoint.disabled = true;
mergedEndpoint.fileLimit = 0;
mergedEndpoint.fileSizeLimit = 0;
mergedEndpoint.totalSizeLimit = 0;
mergedEndpoint.supportedMimeTypes = [];
continue;
}
if (dynamicEndpoint.fileSizeLimit !== undefined) {
mergedEndpoint.fileSizeLimit = mbToBytes(dynamicEndpoint.fileSizeLimit);
}
if (dynamicEndpoint.totalSizeLimit !== undefined) {
mergedEndpoint.totalSizeLimit = mbToBytes(dynamicEndpoint.totalSizeLimit);
}
const configKeys = ['fileLimit'] as const;
configKeys.forEach((field) => {
if (dynamicEndpoint[field] !== undefined) {
mergedEndpoint[field] = dynamicEndpoint[field];
}
});
if (dynamicEndpoint.disabled !== undefined) {
mergedEndpoint.disabled = dynamicEndpoint.disabled;
}
if (dynamicEndpoint.supportedMimeTypes) {
mergedEndpoint.supportedMimeTypes = convertStringsToRegex(
dynamicEndpoint.supportedMimeTypes as unknown as string[],
);
}
}
return mergedConfig;
}