mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00
refactor: replace synchronous file reading with asynchronous methods for improved performance and memory management
This commit is contained in:
parent
671f19bb7f
commit
a1476cab79
5 changed files with 136 additions and 29 deletions
|
@ -1,11 +1,10 @@
|
|||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const axios = require('axios');
|
||||
const FormData = require('form-data');
|
||||
const nodemailer = require('nodemailer');
|
||||
const handlebars = require('handlebars');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { logAxiosError, isEnabled } = require('@librechat/api');
|
||||
const { logAxiosError, isEnabled, readFileAsString } = require('@librechat/api');
|
||||
|
||||
/**
|
||||
* Sends an email using Mailgun API.
|
||||
|
@ -93,8 +92,7 @@ const sendEmailViaSMTP = async ({ transporterOptions, mailOptions }) => {
|
|||
*/
|
||||
const sendEmail = async ({ email, subject, payload, template, throwError = true }) => {
|
||||
try {
|
||||
// Read and compile the email template
|
||||
const source = fs.readFileSync(path.join(__dirname, 'emails', template), 'utf8');
|
||||
const { content: source } = await readFileAsString(path.join(__dirname, 'emails', template));
|
||||
const compiledTemplate = handlebars.compile(source);
|
||||
const html = compiledTemplate(payload);
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import type {
|
|||
OCRImage,
|
||||
} from '~/types';
|
||||
import { logAxiosError, createAxiosInstance } from '~/utils/axios';
|
||||
import { readFileAsBuffer } from '~/utils/files';
|
||||
import { loadServiceKey } from '~/utils/key';
|
||||
|
||||
const axios = createAxiosInstance();
|
||||
|
@ -464,7 +465,9 @@ export const uploadAzureMistralOCR = async (
|
|||
const { apiKey, baseURL } = await loadAuthConfig(context);
|
||||
const model = getModelConfig(context.req.config?.ocr);
|
||||
|
||||
const buffer = fs.readFileSync(context.file.path);
|
||||
const { content: buffer } = await readFileAsBuffer(context.file.path, {
|
||||
fileSize: context.file.size,
|
||||
});
|
||||
const base64 = buffer.toString('base64');
|
||||
/** Uses actual mimetype of the file, 'image/jpeg' as fallback since it seems to be accepted regardless of mismatch */
|
||||
const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`;
|
||||
|
@ -691,7 +694,9 @@ export const uploadGoogleVertexMistralOCR = async (
|
|||
const { serviceAccount, accessToken } = await loadGoogleAuthConfig();
|
||||
const model = getModelConfig(context.req.config?.ocr);
|
||||
|
||||
const buffer = fs.readFileSync(context.file.path);
|
||||
const { content: buffer } = await readFileAsBuffer(context.file.path, {
|
||||
fileSize: context.file.size,
|
||||
});
|
||||
const base64 = buffer.toString('base64');
|
||||
const base64Prefix = `data:${context.file.mimetype || 'application/pdf'};base64,`;
|
||||
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
import axios from 'axios';
|
||||
import FormData from 'form-data';
|
||||
import { createReadStream } from 'fs';
|
||||
import { stat, readFile } from 'fs/promises';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { FileSources } from 'librechat-data-provider';
|
||||
import type { Request as ServerRequest } from 'express';
|
||||
import { logAxiosError, readFileAsString } from '~/utils';
|
||||
import { generateShortLivedToken } from '~/crypto/jwt';
|
||||
import { logAxiosError } from '~/utils';
|
||||
|
||||
/**
|
||||
* Attempts to parse text using RAG API, falls back to native text parsing
|
||||
|
@ -103,26 +102,10 @@ export async function parseTextNative(file: Express.Multer.File): Promise<{
|
|||
bytes: number;
|
||||
source: string;
|
||||
}> {
|
||||
const bytes = file.size || (await stat(file.path)).size;
|
||||
if (bytes > 10 * 1024 * 1024) {
|
||||
const chunks: string[] = [];
|
||||
const stream = createReadStream(file.path, {
|
||||
encoding: 'utf8',
|
||||
highWaterMark: 64 * 1024,
|
||||
});
|
||||
const { content: text, bytes } = await readFileAsString(file.path, {
|
||||
fileSize: file.size,
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
return {
|
||||
text: chunks.join(''),
|
||||
bytes,
|
||||
source: FileSources.text,
|
||||
};
|
||||
}
|
||||
|
||||
const text = await readFile(file.path, 'utf8');
|
||||
return {
|
||||
text,
|
||||
bytes,
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import path from 'path';
|
||||
import crypto from 'node:crypto';
|
||||
import { createReadStream } from 'fs';
|
||||
import { readFile, stat } from 'fs/promises';
|
||||
|
||||
/**
|
||||
* Sanitize a filename by removing any directory components, replacing non-alphanumeric characters
|
||||
|
@ -31,3 +33,122 @@ export function sanitizeFilename(inputName: string): string {
|
|||
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for reading files
|
||||
*/
|
||||
export interface ReadFileOptions {
|
||||
encoding?: BufferEncoding;
|
||||
/** Size threshold in bytes. Files larger than this will be streamed. Default: 10MB */
|
||||
streamThreshold?: number;
|
||||
/** Size of chunks when streaming. Default: 64KB */
|
||||
highWaterMark?: number;
|
||||
/** File size in bytes if known (e.g. from multer). Avoids extra stat() call. */
|
||||
fileSize?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result from reading a file
|
||||
*/
|
||||
export interface ReadFileResult<T> {
|
||||
content: T;
|
||||
bytes: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a file asynchronously. Uses streaming for large files to avoid memory issues.
|
||||
*
|
||||
* @param filePath - Path to the file to read
|
||||
* @param options - Options for reading the file
|
||||
* @returns Promise resolving to the file contents and size
|
||||
* @throws Error if the file cannot be read
|
||||
*/
|
||||
export async function readFileAsString(
|
||||
filePath: string,
|
||||
options: ReadFileOptions = {},
|
||||
): Promise<ReadFileResult<string>> {
|
||||
const {
|
||||
encoding = 'utf8',
|
||||
streamThreshold = 10 * 1024 * 1024, // 10MB
|
||||
highWaterMark = 64 * 1024, // 64KB
|
||||
fileSize,
|
||||
} = options;
|
||||
|
||||
// Get file size if not provided
|
||||
const bytes = fileSize ?? (await stat(filePath)).size;
|
||||
|
||||
// For large files, use streaming to avoid memory issues
|
||||
if (bytes > streamThreshold) {
|
||||
const chunks: string[] = [];
|
||||
const stream = createReadStream(filePath, {
|
||||
encoding,
|
||||
highWaterMark,
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
chunks.push(chunk as string);
|
||||
}
|
||||
|
||||
return { content: chunks.join(''), bytes };
|
||||
}
|
||||
|
||||
// For smaller files, read directly
|
||||
const content = await readFile(filePath, encoding);
|
||||
return { content, bytes };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a file as a Buffer asynchronously. Uses streaming for large files.
|
||||
*
|
||||
* @param filePath - Path to the file to read
|
||||
* @param options - Options for reading the file
|
||||
* @returns Promise resolving to the file contents and size
|
||||
* @throws Error if the file cannot be read
|
||||
*/
|
||||
export async function readFileAsBuffer(
|
||||
filePath: string,
|
||||
options: Omit<ReadFileOptions, 'encoding'> = {},
|
||||
): Promise<ReadFileResult<Buffer>> {
|
||||
const {
|
||||
streamThreshold = 10 * 1024 * 1024, // 10MB
|
||||
highWaterMark = 64 * 1024, // 64KB
|
||||
fileSize,
|
||||
} = options;
|
||||
|
||||
// Get file size if not provided
|
||||
const bytes = fileSize ?? (await stat(filePath)).size;
|
||||
|
||||
// For large files, use streaming to avoid memory issues
|
||||
if (bytes > streamThreshold) {
|
||||
const chunks: Buffer[] = [];
|
||||
const stream = createReadStream(filePath, {
|
||||
highWaterMark,
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
chunks.push(chunk as Buffer);
|
||||
}
|
||||
|
||||
return { content: Buffer.concat(chunks), bytes };
|
||||
}
|
||||
|
||||
// For smaller files, read directly
|
||||
const content = await readFile(filePath);
|
||||
return { content, bytes };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a JSON file asynchronously
|
||||
*
|
||||
* @param filePath - Path to the JSON file to read
|
||||
* @param options - Options for reading the file
|
||||
* @returns Promise resolving to the parsed JSON object
|
||||
* @throws Error if the file cannot be read or parsed
|
||||
*/
|
||||
export async function readJsonFile<T = unknown>(
|
||||
filePath: string,
|
||||
options: Omit<ReadFileOptions, 'encoding'> = {},
|
||||
): Promise<T> {
|
||||
const { content } = await readFileAsString(filePath, { ...options, encoding: 'utf8' });
|
||||
return JSON.parse(content);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import axios from 'axios';
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { readFileAsString } from './files';
|
||||
|
||||
export interface GoogleServiceKey {
|
||||
type?: string;
|
||||
|
@ -63,7 +63,7 @@ export async function loadServiceKey(keyPath: string): Promise<GoogleServiceKey
|
|||
// It's a file path
|
||||
try {
|
||||
const absolutePath = path.isAbsolute(keyPath) ? keyPath : path.resolve(keyPath);
|
||||
const fileContent = fs.readFileSync(absolutePath, 'utf8');
|
||||
const { content: fileContent } = await readFileAsString(absolutePath);
|
||||
serviceKey = JSON.parse(fileContent);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to load service key from file: ${keyPath}`, error);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue