📂 refactor: File Read Operations (#9747)

* fix: axios response logging for text parsing, remove console logging, remove jsdoc * refactor: error logging in logAxiosError function to handle various error types with type guards * refactor: enhance text parsing with improved error handling and async file reading * refactor: replace synchronous file reading with asynchronous methods for improved performance and memory management * ci: update tests
2026-04-05 07:17:18 +02:00 · 2025-09-20 10:17:24 -04:00 · 2025-09-20 10:17:24 -04:00 · 2489670f54
commit 2489670f54
parent 0352067da2
10 changed files with 692 additions and 83 deletions
--- a/packages/api/src/utils/tests/files.test.ts
+++ b/packages/api/src/utils/tests/files.test.ts
@ -0,0 +1,414 @@
+import { createReadStream } from 'fs';
+import { readFile, stat } from 'fs/promises';
+import { Readable } from 'stream';
+import { readFileAsString, readFileAsBuffer, readJsonFile } from '../files';
+
+jest.mock('fs');
+jest.mock('fs/promises');
+
+describe('File utilities', () => {
+  const mockFilePath = '/test/file.txt';
+  const smallContent = 'Hello, World!';
+  const largeContent = 'x'.repeat(11 * 1024 * 1024); // 11MB of 'x'
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  describe('readFileAsString', () => {
+    it('should read small files directly without streaming', async () => {
+      const fileSize = Buffer.byteLength(smallContent);
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+      (readFile as jest.Mock).mockResolvedValue(smallContent);
+
+      const result = await readFileAsString(mockFilePath);
+
+      expect(result).toEqual({
+        content: smallContent,
+        bytes: fileSize,
+      });
+      expect(stat).toHaveBeenCalledWith(mockFilePath);
+      expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
+      expect(createReadStream).not.toHaveBeenCalled();
+    });
+
+    it('should use provided fileSize to avoid stat call', async () => {
+      const fileSize = Buffer.byteLength(smallContent);
+
+      (readFile as jest.Mock).mockResolvedValue(smallContent);
+
+      const result = await readFileAsString(mockFilePath, { fileSize });
+
+      expect(result).toEqual({
+        content: smallContent,
+        bytes: fileSize,
+      });
+      expect(stat).not.toHaveBeenCalled();
+      expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
+    });
+
+    it('should stream large files', async () => {
+      const fileSize = Buffer.byteLength(largeContent);
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+
+      // Create a mock readable stream
+      const chunks = [
+        largeContent.substring(0, 5000000),
+        largeContent.substring(5000000, 10000000),
+        largeContent.substring(10000000),
+      ];
+
+      const mockStream = new Readable({
+        read() {
+          if (chunks.length > 0) {
+            this.push(chunks.shift());
+          } else {
+            this.push(null); // End stream
+          }
+        },
+      });
+
+      (createReadStream as jest.Mock).mockReturnValue(mockStream);
+
+      const result = await readFileAsString(mockFilePath);
+
+      expect(result).toEqual({
+        content: largeContent,
+        bytes: fileSize,
+      });
+      expect(stat).toHaveBeenCalledWith(mockFilePath);
+      expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
+        encoding: 'utf8',
+        highWaterMark: 64 * 1024,
+      });
+      expect(readFile).not.toHaveBeenCalled();
+    });
+
+    it('should use custom encoding', async () => {
+      const fileSize = 100;
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+      (readFile as jest.Mock).mockResolvedValue(smallContent);
+
+      await readFileAsString(mockFilePath, { encoding: 'latin1' });
+
+      expect(readFile).toHaveBeenCalledWith(mockFilePath, 'latin1');
+    });
+
+    it('should respect custom stream threshold', async () => {
+      const customThreshold = 1024; // 1KB
+      const fileSize = 2048; // 2KB
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+
+      const mockStream = new Readable({
+        read() {
+          this.push('test content');
+          this.push(null);
+        },
+      });
+
+      (createReadStream as jest.Mock).mockReturnValue(mockStream);
+
+      await readFileAsString(mockFilePath, { streamThreshold: customThreshold });
+
+      expect(createReadStream).toHaveBeenCalled();
+      expect(readFile).not.toHaveBeenCalled();
+    });
+
+    it('should handle empty files', async () => {
+      const fileSize = 0;
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+      (readFile as jest.Mock).mockResolvedValue('');
+
+      const result = await readFileAsString(mockFilePath);
+
+      expect(result).toEqual({
+        content: '',
+        bytes: 0,
+      });
+    });
+
+    it('should propagate read errors', async () => {
+      const error = new Error('File not found');
+
+      (stat as jest.Mock).mockResolvedValue({ size: 100 });
+      (readFile as jest.Mock).mockRejectedValue(error);
+
+      await expect(readFileAsString(mockFilePath)).rejects.toThrow('File not found');
+    });
+
+    it('should propagate stat errors when fileSize not provided', async () => {
+      const error = new Error('Permission denied');
+
+      (stat as jest.Mock).mockRejectedValue(error);
+
+      await expect(readFileAsString(mockFilePath)).rejects.toThrow('Permission denied');
+    });
+
+    it('should propagate stream errors', async () => {
+      const fileSize = 11 * 1024 * 1024; // 11MB
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+
+      const mockStream = new Readable({
+        read() {
+          this.emit('error', new Error('Stream error'));
+        },
+      });
+
+      (createReadStream as jest.Mock).mockReturnValue(mockStream);
+
+      await expect(readFileAsString(mockFilePath)).rejects.toThrow('Stream error');
+    });
+  });
+
+  describe('readFileAsBuffer', () => {
+    const smallBuffer = Buffer.from(smallContent);
+    const largeBuffer = Buffer.from(largeContent);
+
+    it('should read small files directly without streaming', async () => {
+      const fileSize = smallBuffer.length;
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+      (readFile as jest.Mock).mockResolvedValue(smallBuffer);
+
+      const result = await readFileAsBuffer(mockFilePath);
+
+      expect(result).toEqual({
+        content: smallBuffer,
+        bytes: fileSize,
+      });
+      expect(stat).toHaveBeenCalledWith(mockFilePath);
+      expect(readFile).toHaveBeenCalledWith(mockFilePath);
+      expect(createReadStream).not.toHaveBeenCalled();
+    });
+
+    it('should use provided fileSize to avoid stat call', async () => {
+      const fileSize = smallBuffer.length;
+
+      (readFile as jest.Mock).mockResolvedValue(smallBuffer);
+
+      const result = await readFileAsBuffer(mockFilePath, { fileSize });
+
+      expect(result).toEqual({
+        content: smallBuffer,
+        bytes: fileSize,
+      });
+      expect(stat).not.toHaveBeenCalled();
+    });
+
+    it('should stream large files', async () => {
+      const fileSize = largeBuffer.length;
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+
+      // Split large buffer into chunks
+      const chunk1 = largeBuffer.slice(0, 5000000);
+      const chunk2 = largeBuffer.slice(5000000, 10000000);
+      const chunk3 = largeBuffer.slice(10000000);
+
+      const chunks = [chunk1, chunk2, chunk3];
+
+      const mockStream = new Readable({
+        read() {
+          if (chunks.length > 0) {
+            this.push(chunks.shift());
+          } else {
+            this.push(null);
+          }
+        },
+      });
+
+      (createReadStream as jest.Mock).mockReturnValue(mockStream);
+
+      const result = await readFileAsBuffer(mockFilePath);
+
+      expect(result.bytes).toBe(fileSize);
+      expect(Buffer.compare(result.content, largeBuffer)).toBe(0);
+      expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
+        highWaterMark: 64 * 1024,
+      });
+      expect(readFile).not.toHaveBeenCalled();
+    });
+
+    it('should respect custom highWaterMark', async () => {
+      const fileSize = 11 * 1024 * 1024; // 11MB
+      const customHighWaterMark = 128 * 1024; // 128KB
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+
+      const mockStream = new Readable({
+        read() {
+          this.push(Buffer.from('test'));
+          this.push(null);
+        },
+      });
+
+      (createReadStream as jest.Mock).mockReturnValue(mockStream);
+
+      await readFileAsBuffer(mockFilePath, { highWaterMark: customHighWaterMark });
+
+      expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
+        highWaterMark: customHighWaterMark,
+      });
+    });
+
+    it('should handle empty buffer files', async () => {
+      const emptyBuffer = Buffer.alloc(0);
+
+      (stat as jest.Mock).mockResolvedValue({ size: 0 });
+      (readFile as jest.Mock).mockResolvedValue(emptyBuffer);
+
+      const result = await readFileAsBuffer(mockFilePath);
+
+      expect(result).toEqual({
+        content: emptyBuffer,
+        bytes: 0,
+      });
+    });
+
+    it('should propagate errors', async () => {
+      const error = new Error('Access denied');
+
+      (stat as jest.Mock).mockResolvedValue({ size: 100 });
+      (readFile as jest.Mock).mockRejectedValue(error);
+
+      await expect(readFileAsBuffer(mockFilePath)).rejects.toThrow('Access denied');
+    });
+  });
+
+  describe('readJsonFile', () => {
+    const validJson = { name: 'test', value: 123, nested: { key: 'value' } };
+    const jsonString = JSON.stringify(validJson);
+
+    it('should parse valid JSON files', async () => {
+      (stat as jest.Mock).mockResolvedValue({ size: jsonString.length });
+      (readFile as jest.Mock).mockResolvedValue(jsonString);
+
+      const result = await readJsonFile(mockFilePath);
+
+      expect(result).toEqual(validJson);
+      expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
+    });
+
+    it('should parse JSON with provided fileSize', async () => {
+      const fileSize = jsonString.length;
+
+      (readFile as jest.Mock).mockResolvedValue(jsonString);
+
+      const result = await readJsonFile(mockFilePath, { fileSize });
+
+      expect(result).toEqual(validJson);
+      expect(stat).not.toHaveBeenCalled();
+    });
+
+    it('should handle JSON arrays', async () => {
+      const jsonArray = [1, 2, 3, { key: 'value' }];
+      const arrayString = JSON.stringify(jsonArray);
+
+      (stat as jest.Mock).mockResolvedValue({ size: arrayString.length });
+      (readFile as jest.Mock).mockResolvedValue(arrayString);
+
+      const result = await readJsonFile(mockFilePath);
+
+      expect(result).toEqual(jsonArray);
+    });
+
+    it('should throw on invalid JSON', async () => {
+      const invalidJson = '{ invalid json }';
+
+      (stat as jest.Mock).mockResolvedValue({ size: invalidJson.length });
+      (readFile as jest.Mock).mockResolvedValue(invalidJson);
+
+      await expect(readJsonFile(mockFilePath)).rejects.toThrow();
+    });
+
+    it('should throw on empty file', async () => {
+      (stat as jest.Mock).mockResolvedValue({ size: 0 });
+      (readFile as jest.Mock).mockResolvedValue('');
+
+      await expect(readJsonFile(mockFilePath)).rejects.toThrow();
+    });
+
+    it('should handle large JSON files with streaming', async () => {
+      const largeJson = { data: 'x'.repeat(11 * 1024 * 1024) }; // >10MB
+      const largeJsonString = JSON.stringify(largeJson);
+      const fileSize = largeJsonString.length;
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+
+      // Create chunks for streaming
+      const chunks: string[] = [];
+      let offset = 0;
+      const chunkSize = 5 * 1024 * 1024; // 5MB chunks
+
+      while (offset < largeJsonString.length) {
+        chunks.push(largeJsonString.slice(offset, offset + chunkSize));
+        offset += chunkSize;
+      }
+
+      const mockStream = new Readable({
+        read() {
+          if (chunks.length > 0) {
+            this.push(chunks.shift());
+          } else {
+            this.push(null);
+          }
+        },
+      });
+
+      (createReadStream as jest.Mock).mockReturnValue(mockStream);
+
+      const result = await readJsonFile(mockFilePath);
+
+      expect(result).toEqual(largeJson);
+      expect(createReadStream).toHaveBeenCalled();
+      expect(readFile).not.toHaveBeenCalled();
+    });
+
+    it('should use custom stream threshold', async () => {
+      const customThreshold = 100;
+      const json = { test: 'x'.repeat(200) };
+      const jsonStr = JSON.stringify(json);
+      const fileSize = jsonStr.length;
+
+      (stat as jest.Mock).mockResolvedValue({ size: fileSize });
+
+      const mockStream = new Readable({
+        read() {
+          this.push(jsonStr);
+          this.push(null);
+        },
+      });
+
+      (createReadStream as jest.Mock).mockReturnValue(mockStream);
+
+      await readJsonFile(mockFilePath, { streamThreshold: customThreshold });
+
+      expect(createReadStream).toHaveBeenCalled();
+    });
+
+    it('should preserve type with generics', async () => {
+      interface TestType {
+        id: number;
+        name: string;
+      }
+
+      const typedJson: TestType = { id: 1, name: 'test' };
+      const jsonString = JSON.stringify(typedJson);
+
+      (stat as jest.Mock).mockResolvedValue({ size: jsonString.length });
+      (readFile as jest.Mock).mockResolvedValue(jsonString);
+
+      const result = await readJsonFile<TestType>(mockFilePath);
+
+      expect(result).toEqual(typedJson);
+      expect(result.id).toBe(1);
+      expect(result.name).toBe('test');
+    });
+  });
+});
--- a/packages/api/src/utils/axios.ts
+++ b/packages/api/src/utils/axios.ts
@ -9,12 +9,25 @@ import type { AxiosInstance, AxiosProxyConfig, AxiosError } from 'axios';
 * @param options.error - The Axios error object.
 * @returns The log message.
 */
-export const logAxiosError = ({ message, error }: { message: string; error: AxiosError }) => {
+export const logAxiosError = ({
+  message,
+  error,
+}: {
+  message: string;
+  error: AxiosError | Error | unknown;
+}) => {
  let logMessage = message;
  try {
-    const stack = error.stack || 'No stack trace available';
+    const stack =
+      error != null
+        ? (error as Error | AxiosError)?.stack || 'No stack trace available'
+        : 'No stack trace available';
+    const errorMessage =
+      error != null
+        ? (error as Error | AxiosError)?.message || 'No error message available'
+        : 'No error message available';

-    if (error.response?.status) {
+    if (axios.isAxiosError(error) && error.response && error.response?.status) {
      const { status, headers, data } = error.response;
      logMessage = `${message} The server responded with status ${status}: ${error.message}`;
      logger.error(logMessage, {
@ -23,18 +36,18 @@ export const logAxiosError = ({ message, error }: { message: string; error: Axio
        data,
        stack,
      });
-    } else if (error.request) {
+    } else if (axios.isAxiosError(error) && error.request) {
      const { method, url } = error.config || {};
      logMessage = `${message} No response received for ${method ? method.toUpperCase() : ''} ${url || ''}: ${error.message}`;
      logger.error(logMessage, {
        requestInfo: { method, url },
        stack,
      });
-    } else if (error?.message?.includes("Cannot read properties of undefined (reading 'status')")) {
-      logMessage = `${message} It appears the request timed out or was unsuccessful: ${error.message}`;
+    } else if (errorMessage?.includes("Cannot read properties of undefined (reading 'status')")) {
+      logMessage = `${message} It appears the request timed out or was unsuccessful: ${errorMessage}`;
      logger.error(logMessage, { stack });
    } else {
-      logMessage = `${message} An error occurred while setting up the request: ${error.message}`;
+      logMessage = `${message} An error occurred while setting up the request: ${errorMessage}`;
      logger.error(logMessage, { stack });
    }
  } catch (err: unknown) {
--- a/packages/api/src/utils/files.ts
+++ b/packages/api/src/utils/files.ts
@ -1,5 +1,7 @@
 import path from 'path';
 import crypto from 'node:crypto';
+import { createReadStream } from 'fs';
+import { readFile, stat } from 'fs/promises';

 /**
 * Sanitize a filename by removing any directory components, replacing non-alphanumeric characters
@ -31,3 +33,122 @@ export function sanitizeFilename(inputName: string): string {

  return name;
 }
+
+/**
+ * Options for reading files
+ */
+export interface ReadFileOptions {
+  encoding?: BufferEncoding;
+  /** Size threshold in bytes. Files larger than this will be streamed. Default: 10MB */
+  streamThreshold?: number;
+  /** Size of chunks when streaming. Default: 64KB */
+  highWaterMark?: number;
+  /** File size in bytes if known (e.g. from multer). Avoids extra stat() call. */
+  fileSize?: number;
+}
+
+/**
+ * Result from reading a file
+ */
+export interface ReadFileResult<T> {
+  content: T;
+  bytes: number;
+}
+
+/**
+ * Reads a file asynchronously. Uses streaming for large files to avoid memory issues.
+ *
+ * @param filePath - Path to the file to read
+ * @param options - Options for reading the file
+ * @returns Promise resolving to the file contents and size
+ * @throws Error if the file cannot be read
+ */
+export async function readFileAsString(
+  filePath: string,
+  options: ReadFileOptions = {},
+): Promise<ReadFileResult<string>> {
+  const {
+    encoding = 'utf8',
+    streamThreshold = 10 * 1024 * 1024, // 10MB
+    highWaterMark = 64 * 1024, // 64KB
+    fileSize,
+  } = options;
+
+  // Get file size if not provided
+  const bytes = fileSize ?? (await stat(filePath)).size;
+
+  // For large files, use streaming to avoid memory issues
+  if (bytes > streamThreshold) {
+    const chunks: string[] = [];
+    const stream = createReadStream(filePath, {
+      encoding,
+      highWaterMark,
+    });
+
+    for await (const chunk of stream) {
+      chunks.push(chunk as string);
+    }
+
+    return { content: chunks.join(''), bytes };
+  }
+
+  // For smaller files, read directly
+  const content = await readFile(filePath, encoding);
+  return { content, bytes };
+}
+
+/**
+ * Reads a file as a Buffer asynchronously. Uses streaming for large files.
+ *
+ * @param filePath - Path to the file to read
+ * @param options - Options for reading the file
+ * @returns Promise resolving to the file contents and size
+ * @throws Error if the file cannot be read
+ */
+export async function readFileAsBuffer(
+  filePath: string,
+  options: Omit<ReadFileOptions, 'encoding'> = {},
+): Promise<ReadFileResult<Buffer>> {
+  const {
+    streamThreshold = 10 * 1024 * 1024, // 10MB
+    highWaterMark = 64 * 1024, // 64KB
+    fileSize,
+  } = options;
+
+  // Get file size if not provided
+  const bytes = fileSize ?? (await stat(filePath)).size;
+
+  // For large files, use streaming to avoid memory issues
+  if (bytes > streamThreshold) {
+    const chunks: Buffer[] = [];
+    const stream = createReadStream(filePath, {
+      highWaterMark,
+    });
+
+    for await (const chunk of stream) {
+      chunks.push(chunk as Buffer);
+    }
+
+    return { content: Buffer.concat(chunks), bytes };
+  }
+
+  // For smaller files, read directly
+  const content = await readFile(filePath);
+  return { content, bytes };
+}
+
+/**
+ * Reads a JSON file asynchronously
+ *
+ * @param filePath - Path to the JSON file to read
+ * @param options - Options for reading the file
+ * @returns Promise resolving to the parsed JSON object
+ * @throws Error if the file cannot be read or parsed
+ */
+export async function readJsonFile<T = unknown>(
+  filePath: string,
+  options: Omit<ReadFileOptions, 'encoding'> = {},
+): Promise<T> {
+  const { content } = await readFileAsString(filePath, { ...options, encoding: 'utf8' });
+  return JSON.parse(content);
+}
--- a/packages/api/src/utils/key.test.ts
+++ b/packages/api/src/utils/key.test.ts
@ -1,6 +1,6 @@
-import fs from 'fs';
 import path from 'path';
 import axios from 'axios';
+import { readFileAsString } from './files';
 import { loadServiceKey } from './key';

 jest.mock('fs');
@ -11,6 +11,10 @@ jest.mock('@librechat/data-schemas', () => ({
  },
 }));

+jest.mock('./files', () => ({
+  readFileAsString: jest.fn(),
+}));
+
 describe('loadServiceKey', () => {
  const mockServiceKey = {
    type: 'service_account',
@ -49,10 +53,13 @@ describe('loadServiceKey', () => {

  it('should load from file path', async () => {
    const filePath = '/path/to/service-key.json';
-    (fs.readFileSync as jest.Mock).mockReturnValue(JSON.stringify(mockServiceKey));
+    (readFileAsString as jest.Mock).mockResolvedValue({
+      content: JSON.stringify(mockServiceKey),
+      bytes: JSON.stringify(mockServiceKey).length,
+    });

    const result = await loadServiceKey(filePath);
-    expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve(filePath), 'utf8');
+    expect(readFileAsString).toHaveBeenCalledWith(path.resolve(filePath));
    expect(result).toEqual(mockServiceKey);
  });

@ -73,9 +80,7 @@ describe('loadServiceKey', () => {

  it('should handle file read errors', async () => {
    const filePath = '/path/to/nonexistent.json';
-    (fs.readFileSync as jest.Mock).mockImplementation(() => {
-      throw new Error('File not found');
-    });
+    (readFileAsString as jest.Mock).mockRejectedValue(new Error('File not found'));

    const result = await loadServiceKey(filePath);
    expect(result).toBeNull();
--- a/packages/api/src/utils/key.ts
+++ b/packages/api/src/utils/key.ts
@ -1,7 +1,7 @@
-import fs from 'fs';
 import path from 'path';
 import axios from 'axios';
 import { logger } from '@librechat/data-schemas';
+import { readFileAsString } from './files';

 export interface GoogleServiceKey {
  type?: string;
@ -63,7 +63,7 @@ export async function loadServiceKey(keyPath: string): Promise<GoogleServiceKey
    // It's a file path
    try {
      const absolutePath = path.isAbsolute(keyPath) ? keyPath : path.resolve(keyPath);
-      const fileContent = fs.readFileSync(absolutePath, 'utf8');
+      const { content: fileContent } = await readFileAsString(absolutePath);
      serviceKey = JSON.parse(fileContent);
    } catch (error) {
      logger.error(`Failed to load service key from file: ${keyPath}`, error);