refactor: replace synchronous file reading with asynchronous methods for improved performance and memory management

This commit is contained in:
Danny Avila 2025-09-20 09:48:06 -04:00
parent 671f19bb7f
commit a1476cab79
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
5 changed files with 136 additions and 29 deletions

View file

@ -1,11 +1,10 @@
const fs = require('fs');
const path = require('path');
const axios = require('axios');
const FormData = require('form-data');
const nodemailer = require('nodemailer');
const handlebars = require('handlebars');
const { logger } = require('@librechat/data-schemas');
const { logAxiosError, isEnabled } = require('@librechat/api');
const { logAxiosError, isEnabled, readFileAsString } = require('@librechat/api');
/**
* Sends an email using Mailgun API.
@ -93,8 +92,7 @@ const sendEmailViaSMTP = async ({ transporterOptions, mailOptions }) => {
*/
const sendEmail = async ({ email, subject, payload, template, throwError = true }) => {
try {
// Read and compile the email template
const source = fs.readFileSync(path.join(__dirname, 'emails', template), 'utf8');
const { content: source } = await readFileAsString(path.join(__dirname, 'emails', template));
const compiledTemplate = handlebars.compile(source);
const html = compiledTemplate(payload);

View file

@ -22,6 +22,7 @@ import type {
OCRImage,
} from '~/types';
import { logAxiosError, createAxiosInstance } from '~/utils/axios';
import { readFileAsBuffer } from '~/utils/files';
import { loadServiceKey } from '~/utils/key';
const axios = createAxiosInstance();
@ -464,7 +465,9 @@ export const uploadAzureMistralOCR = async (
const { apiKey, baseURL } = await loadAuthConfig(context);
const model = getModelConfig(context.req.config?.ocr);
const buffer = fs.readFileSync(context.file.path);
const { content: buffer } = await readFileAsBuffer(context.file.path, {
fileSize: context.file.size,
});
const base64 = buffer.toString('base64');
/** Uses actual mimetype of the file, 'image/jpeg' as fallback since it seems to be accepted regardless of mismatch */
const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`;
@ -691,7 +694,9 @@ export const uploadGoogleVertexMistralOCR = async (
const { serviceAccount, accessToken } = await loadGoogleAuthConfig();
const model = getModelConfig(context.req.config?.ocr);
const buffer = fs.readFileSync(context.file.path);
const { content: buffer } = await readFileAsBuffer(context.file.path, {
fileSize: context.file.size,
});
const base64 = buffer.toString('base64');
const base64Prefix = `data:${context.file.mimetype || 'application/pdf'};base64,`;

View file

@ -1,12 +1,11 @@
import axios from 'axios';
import FormData from 'form-data';
import { createReadStream } from 'fs';
import { stat, readFile } from 'fs/promises';
import { logger } from '@librechat/data-schemas';
import { FileSources } from 'librechat-data-provider';
import type { Request as ServerRequest } from 'express';
import { logAxiosError, readFileAsString } from '~/utils';
import { generateShortLivedToken } from '~/crypto/jwt';
import { logAxiosError } from '~/utils';
/**
* Attempts to parse text using RAG API, falls back to native text parsing
@ -103,26 +102,10 @@ export async function parseTextNative(file: Express.Multer.File): Promise<{
bytes: number;
source: string;
}> {
const bytes = file.size || (await stat(file.path)).size;
if (bytes > 10 * 1024 * 1024) {
const chunks: string[] = [];
const stream = createReadStream(file.path, {
encoding: 'utf8',
highWaterMark: 64 * 1024,
});
const { content: text, bytes } = await readFileAsString(file.path, {
fileSize: file.size,
});
for await (const chunk of stream) {
chunks.push(chunk);
}
return {
text: chunks.join(''),
bytes,
source: FileSources.text,
};
}
const text = await readFile(file.path, 'utf8');
return {
text,
bytes,

View file

@ -1,5 +1,7 @@
import path from 'path';
import crypto from 'node:crypto';
import { createReadStream } from 'fs';
import { readFile, stat } from 'fs/promises';
/**
* Sanitize a filename by removing any directory components, replacing non-alphanumeric characters
@ -31,3 +33,122 @@ export function sanitizeFilename(inputName: string): string {
return name;
}
/**
* Options for reading files
*/
export interface ReadFileOptions {
encoding?: BufferEncoding;
/** Size threshold in bytes. Files larger than this will be streamed. Default: 10MB */
streamThreshold?: number;
/** Size of chunks when streaming. Default: 64KB */
highWaterMark?: number;
/** File size in bytes if known (e.g. from multer). Avoids extra stat() call. */
fileSize?: number;
}
/**
* Result from reading a file
*/
export interface ReadFileResult<T> {
content: T;
bytes: number;
}
/**
* Reads a file asynchronously. Uses streaming for large files to avoid memory issues.
*
* @param filePath - Path to the file to read
* @param options - Options for reading the file
* @returns Promise resolving to the file contents and size
* @throws Error if the file cannot be read
*/
export async function readFileAsString(
filePath: string,
options: ReadFileOptions = {},
): Promise<ReadFileResult<string>> {
const {
encoding = 'utf8',
streamThreshold = 10 * 1024 * 1024, // 10MB
highWaterMark = 64 * 1024, // 64KB
fileSize,
} = options;
// Get file size if not provided
const bytes = fileSize ?? (await stat(filePath)).size;
// For large files, use streaming to avoid memory issues
if (bytes > streamThreshold) {
const chunks: string[] = [];
const stream = createReadStream(filePath, {
encoding,
highWaterMark,
});
for await (const chunk of stream) {
chunks.push(chunk as string);
}
return { content: chunks.join(''), bytes };
}
// For smaller files, read directly
const content = await readFile(filePath, encoding);
return { content, bytes };
}
/**
* Reads a file as a Buffer asynchronously. Uses streaming for large files.
*
* @param filePath - Path to the file to read
* @param options - Options for reading the file
* @returns Promise resolving to the file contents and size
* @throws Error if the file cannot be read
*/
export async function readFileAsBuffer(
filePath: string,
options: Omit<ReadFileOptions, 'encoding'> = {},
): Promise<ReadFileResult<Buffer>> {
const {
streamThreshold = 10 * 1024 * 1024, // 10MB
highWaterMark = 64 * 1024, // 64KB
fileSize,
} = options;
// Get file size if not provided
const bytes = fileSize ?? (await stat(filePath)).size;
// For large files, use streaming to avoid memory issues
if (bytes > streamThreshold) {
const chunks: Buffer[] = [];
const stream = createReadStream(filePath, {
highWaterMark,
});
for await (const chunk of stream) {
chunks.push(chunk as Buffer);
}
return { content: Buffer.concat(chunks), bytes };
}
// For smaller files, read directly
const content = await readFile(filePath);
return { content, bytes };
}
/**
* Reads a JSON file asynchronously
*
* @param filePath - Path to the JSON file to read
* @param options - Options for reading the file
* @returns Promise resolving to the parsed JSON object
* @throws Error if the file cannot be read or parsed
*/
export async function readJsonFile<T = unknown>(
filePath: string,
options: Omit<ReadFileOptions, 'encoding'> = {},
): Promise<T> {
const { content } = await readFileAsString(filePath, { ...options, encoding: 'utf8' });
return JSON.parse(content);
}

View file

@ -1,7 +1,7 @@
import fs from 'fs';
import path from 'path';
import axios from 'axios';
import { logger } from '@librechat/data-schemas';
import { readFileAsString } from './files';
export interface GoogleServiceKey {
type?: string;
@ -63,7 +63,7 @@ export async function loadServiceKey(keyPath: string): Promise<GoogleServiceKey
// It's a file path
try {
const absolutePath = path.isAbsolute(keyPath) ? keyPath : path.resolve(keyPath);
const fileContent = fs.readFileSync(absolutePath, 'utf8');
const { content: fileContent } = await readFileAsString(absolutePath);
serviceKey = JSON.parse(fileContent);
} catch (error) {
logger.error(`Failed to load service key from file: ${keyPath}`, error);