diff --git a/api/server/utils/sendEmail.js b/api/server/utils/sendEmail.js index ee64b209f..432a571ff 100644 --- a/api/server/utils/sendEmail.js +++ b/api/server/utils/sendEmail.js @@ -1,11 +1,10 @@ -const fs = require('fs'); const path = require('path'); const axios = require('axios'); const FormData = require('form-data'); const nodemailer = require('nodemailer'); const handlebars = require('handlebars'); const { logger } = require('@librechat/data-schemas'); -const { logAxiosError, isEnabled } = require('@librechat/api'); +const { logAxiosError, isEnabled, readFileAsString } = require('@librechat/api'); /** * Sends an email using Mailgun API. @@ -93,8 +92,7 @@ const sendEmailViaSMTP = async ({ transporterOptions, mailOptions }) => { */ const sendEmail = async ({ email, subject, payload, template, throwError = true }) => { try { - // Read and compile the email template - const source = fs.readFileSync(path.join(__dirname, 'emails', template), 'utf8'); + const { content: source } = await readFileAsString(path.join(__dirname, 'emails', template)); const compiledTemplate = handlebars.compile(source); const html = compiledTemplate(payload); diff --git a/packages/api/src/files/mistral/crud.ts b/packages/api/src/files/mistral/crud.ts index dda29216f..e0ef69ab3 100644 --- a/packages/api/src/files/mistral/crud.ts +++ b/packages/api/src/files/mistral/crud.ts @@ -22,6 +22,7 @@ import type { OCRImage, } from '~/types'; import { logAxiosError, createAxiosInstance } from '~/utils/axios'; +import { readFileAsBuffer } from '~/utils/files'; import { loadServiceKey } from '~/utils/key'; const axios = createAxiosInstance(); @@ -464,7 +465,9 @@ export const uploadAzureMistralOCR = async ( const { apiKey, baseURL } = await loadAuthConfig(context); const model = getModelConfig(context.req.config?.ocr); - const buffer = fs.readFileSync(context.file.path); + const { content: buffer } = await readFileAsBuffer(context.file.path, { + fileSize: context.file.size, + }); const base64 = buffer.toString('base64'); /** Uses actual mimetype of the file, 'image/jpeg' as fallback since it seems to be accepted regardless of mismatch */ const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`; @@ -691,7 +694,9 @@ export const uploadGoogleVertexMistralOCR = async ( const { serviceAccount, accessToken } = await loadGoogleAuthConfig(); const model = getModelConfig(context.req.config?.ocr); - const buffer = fs.readFileSync(context.file.path); + const { content: buffer } = await readFileAsBuffer(context.file.path, { + fileSize: context.file.size, + }); const base64 = buffer.toString('base64'); const base64Prefix = `data:${context.file.mimetype || 'application/pdf'};base64,`; diff --git a/packages/api/src/files/text.ts b/packages/api/src/files/text.ts index 3635beaa0..06e781bb5 100644 --- a/packages/api/src/files/text.ts +++ b/packages/api/src/files/text.ts @@ -1,12 +1,11 @@ import axios from 'axios'; import FormData from 'form-data'; import { createReadStream } from 'fs'; -import { stat, readFile } from 'fs/promises'; import { logger } from '@librechat/data-schemas'; import { FileSources } from 'librechat-data-provider'; import type { Request as ServerRequest } from 'express'; +import { logAxiosError, readFileAsString } from '~/utils'; import { generateShortLivedToken } from '~/crypto/jwt'; -import { logAxiosError } from '~/utils'; /** * Attempts to parse text using RAG API, falls back to native text parsing @@ -103,26 +102,10 @@ export async function parseTextNative(file: Express.Multer.File): Promise<{ bytes: number; source: string; }> { - const bytes = file.size || (await stat(file.path)).size; - if (bytes > 10 * 1024 * 1024) { - const chunks: string[] = []; - const stream = createReadStream(file.path, { - encoding: 'utf8', - highWaterMark: 64 * 1024, - }); + const { content: text, bytes } = await readFileAsString(file.path, { + fileSize: file.size, + }); - for await (const chunk of stream) { - chunks.push(chunk); - } - - return { - text: chunks.join(''), - bytes, - source: FileSources.text, - }; - } - - const text = await readFile(file.path, 'utf8'); return { text, bytes, diff --git a/packages/api/src/utils/files.ts b/packages/api/src/utils/files.ts index 0d8f111a2..2fa3b62ab 100644 --- a/packages/api/src/utils/files.ts +++ b/packages/api/src/utils/files.ts @@ -1,5 +1,7 @@ import path from 'path'; import crypto from 'node:crypto'; +import { createReadStream } from 'fs'; +import { readFile, stat } from 'fs/promises'; /** * Sanitize a filename by removing any directory components, replacing non-alphanumeric characters @@ -31,3 +33,122 @@ export function sanitizeFilename(inputName: string): string { return name; } + +/** + * Options for reading files + */ +export interface ReadFileOptions { + encoding?: BufferEncoding; + /** Size threshold in bytes. Files larger than this will be streamed. Default: 10MB */ + streamThreshold?: number; + /** Size of chunks when streaming. Default: 64KB */ + highWaterMark?: number; + /** File size in bytes if known (e.g. from multer). Avoids extra stat() call. */ + fileSize?: number; +} + +/** + * Result from reading a file + */ +export interface ReadFileResult { + content: T; + bytes: number; +} + +/** + * Reads a file asynchronously. Uses streaming for large files to avoid memory issues. + * + * @param filePath - Path to the file to read + * @param options - Options for reading the file + * @returns Promise resolving to the file contents and size + * @throws Error if the file cannot be read + */ +export async function readFileAsString( + filePath: string, + options: ReadFileOptions = {}, +): Promise> { + const { + encoding = 'utf8', + streamThreshold = 10 * 1024 * 1024, // 10MB + highWaterMark = 64 * 1024, // 64KB + fileSize, + } = options; + + // Get file size if not provided + const bytes = fileSize ?? (await stat(filePath)).size; + + // For large files, use streaming to avoid memory issues + if (bytes > streamThreshold) { + const chunks: string[] = []; + const stream = createReadStream(filePath, { + encoding, + highWaterMark, + }); + + for await (const chunk of stream) { + chunks.push(chunk as string); + } + + return { content: chunks.join(''), bytes }; + } + + // For smaller files, read directly + const content = await readFile(filePath, encoding); + return { content, bytes }; +} + +/** + * Reads a file as a Buffer asynchronously. Uses streaming for large files. + * + * @param filePath - Path to the file to read + * @param options - Options for reading the file + * @returns Promise resolving to the file contents and size + * @throws Error if the file cannot be read + */ +export async function readFileAsBuffer( + filePath: string, + options: Omit = {}, +): Promise> { + const { + streamThreshold = 10 * 1024 * 1024, // 10MB + highWaterMark = 64 * 1024, // 64KB + fileSize, + } = options; + + // Get file size if not provided + const bytes = fileSize ?? (await stat(filePath)).size; + + // For large files, use streaming to avoid memory issues + if (bytes > streamThreshold) { + const chunks: Buffer[] = []; + const stream = createReadStream(filePath, { + highWaterMark, + }); + + for await (const chunk of stream) { + chunks.push(chunk as Buffer); + } + + return { content: Buffer.concat(chunks), bytes }; + } + + // For smaller files, read directly + const content = await readFile(filePath); + return { content, bytes }; +} + +/** + * Reads a JSON file asynchronously + * + * @param filePath - Path to the JSON file to read + * @param options - Options for reading the file + * @returns Promise resolving to the parsed JSON object + * @throws Error if the file cannot be read or parsed + */ +export async function readJsonFile( + filePath: string, + options: Omit = {}, +): Promise { + const { content } = await readFileAsString(filePath, { ...options, encoding: 'utf8' }); + return JSON.parse(content); +} diff --git a/packages/api/src/utils/key.ts b/packages/api/src/utils/key.ts index 086e74c06..13dabeaf5 100644 --- a/packages/api/src/utils/key.ts +++ b/packages/api/src/utils/key.ts @@ -1,7 +1,7 @@ -import fs from 'fs'; import path from 'path'; import axios from 'axios'; import { logger } from '@librechat/data-schemas'; +import { readFileAsString } from './files'; export interface GoogleServiceKey { type?: string; @@ -63,7 +63,7 @@ export async function loadServiceKey(keyPath: string): Promise