📂 refactor: File Read Operations (#9747)

* fix: axios response logging for text parsing, remove console logging, remove jsdoc

* refactor: error logging in logAxiosError function to handle various error types with type guards

* refactor: enhance text parsing with improved error handling and async file reading

* refactor: replace synchronous file reading with asynchronous methods for improved performance and memory management

* ci: update tests
This commit is contained in:
Danny Avila 2025-09-20 10:17:24 -04:00 committed by GitHub
parent 0352067da2
commit 2489670f54
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 692 additions and 83 deletions

View file

@ -0,0 +1,414 @@
import { createReadStream } from 'fs';
import { readFile, stat } from 'fs/promises';
import { Readable } from 'stream';
import { readFileAsString, readFileAsBuffer, readJsonFile } from '../files';
jest.mock('fs');
jest.mock('fs/promises');
describe('File utilities', () => {
const mockFilePath = '/test/file.txt';
const smallContent = 'Hello, World!';
const largeContent = 'x'.repeat(11 * 1024 * 1024); // 11MB of 'x'
beforeEach(() => {
jest.clearAllMocks();
});
describe('readFileAsString', () => {
it('should read small files directly without streaming', async () => {
const fileSize = Buffer.byteLength(smallContent);
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
(readFile as jest.Mock).mockResolvedValue(smallContent);
const result = await readFileAsString(mockFilePath);
expect(result).toEqual({
content: smallContent,
bytes: fileSize,
});
expect(stat).toHaveBeenCalledWith(mockFilePath);
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
expect(createReadStream).not.toHaveBeenCalled();
});
it('should use provided fileSize to avoid stat call', async () => {
const fileSize = Buffer.byteLength(smallContent);
(readFile as jest.Mock).mockResolvedValue(smallContent);
const result = await readFileAsString(mockFilePath, { fileSize });
expect(result).toEqual({
content: smallContent,
bytes: fileSize,
});
expect(stat).not.toHaveBeenCalled();
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
});
it('should stream large files', async () => {
const fileSize = Buffer.byteLength(largeContent);
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
// Create a mock readable stream
const chunks = [
largeContent.substring(0, 5000000),
largeContent.substring(5000000, 10000000),
largeContent.substring(10000000),
];
const mockStream = new Readable({
read() {
if (chunks.length > 0) {
this.push(chunks.shift());
} else {
this.push(null); // End stream
}
},
});
(createReadStream as jest.Mock).mockReturnValue(mockStream);
const result = await readFileAsString(mockFilePath);
expect(result).toEqual({
content: largeContent,
bytes: fileSize,
});
expect(stat).toHaveBeenCalledWith(mockFilePath);
expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
encoding: 'utf8',
highWaterMark: 64 * 1024,
});
expect(readFile).not.toHaveBeenCalled();
});
it('should use custom encoding', async () => {
const fileSize = 100;
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
(readFile as jest.Mock).mockResolvedValue(smallContent);
await readFileAsString(mockFilePath, { encoding: 'latin1' });
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'latin1');
});
it('should respect custom stream threshold', async () => {
const customThreshold = 1024; // 1KB
const fileSize = 2048; // 2KB
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
const mockStream = new Readable({
read() {
this.push('test content');
this.push(null);
},
});
(createReadStream as jest.Mock).mockReturnValue(mockStream);
await readFileAsString(mockFilePath, { streamThreshold: customThreshold });
expect(createReadStream).toHaveBeenCalled();
expect(readFile).not.toHaveBeenCalled();
});
it('should handle empty files', async () => {
const fileSize = 0;
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
(readFile as jest.Mock).mockResolvedValue('');
const result = await readFileAsString(mockFilePath);
expect(result).toEqual({
content: '',
bytes: 0,
});
});
it('should propagate read errors', async () => {
const error = new Error('File not found');
(stat as jest.Mock).mockResolvedValue({ size: 100 });
(readFile as jest.Mock).mockRejectedValue(error);
await expect(readFileAsString(mockFilePath)).rejects.toThrow('File not found');
});
it('should propagate stat errors when fileSize not provided', async () => {
const error = new Error('Permission denied');
(stat as jest.Mock).mockRejectedValue(error);
await expect(readFileAsString(mockFilePath)).rejects.toThrow('Permission denied');
});
it('should propagate stream errors', async () => {
const fileSize = 11 * 1024 * 1024; // 11MB
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
const mockStream = new Readable({
read() {
this.emit('error', new Error('Stream error'));
},
});
(createReadStream as jest.Mock).mockReturnValue(mockStream);
await expect(readFileAsString(mockFilePath)).rejects.toThrow('Stream error');
});
});
describe('readFileAsBuffer', () => {
const smallBuffer = Buffer.from(smallContent);
const largeBuffer = Buffer.from(largeContent);
it('should read small files directly without streaming', async () => {
const fileSize = smallBuffer.length;
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
(readFile as jest.Mock).mockResolvedValue(smallBuffer);
const result = await readFileAsBuffer(mockFilePath);
expect(result).toEqual({
content: smallBuffer,
bytes: fileSize,
});
expect(stat).toHaveBeenCalledWith(mockFilePath);
expect(readFile).toHaveBeenCalledWith(mockFilePath);
expect(createReadStream).not.toHaveBeenCalled();
});
it('should use provided fileSize to avoid stat call', async () => {
const fileSize = smallBuffer.length;
(readFile as jest.Mock).mockResolvedValue(smallBuffer);
const result = await readFileAsBuffer(mockFilePath, { fileSize });
expect(result).toEqual({
content: smallBuffer,
bytes: fileSize,
});
expect(stat).not.toHaveBeenCalled();
});
it('should stream large files', async () => {
const fileSize = largeBuffer.length;
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
// Split large buffer into chunks
const chunk1 = largeBuffer.slice(0, 5000000);
const chunk2 = largeBuffer.slice(5000000, 10000000);
const chunk3 = largeBuffer.slice(10000000);
const chunks = [chunk1, chunk2, chunk3];
const mockStream = new Readable({
read() {
if (chunks.length > 0) {
this.push(chunks.shift());
} else {
this.push(null);
}
},
});
(createReadStream as jest.Mock).mockReturnValue(mockStream);
const result = await readFileAsBuffer(mockFilePath);
expect(result.bytes).toBe(fileSize);
expect(Buffer.compare(result.content, largeBuffer)).toBe(0);
expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
highWaterMark: 64 * 1024,
});
expect(readFile).not.toHaveBeenCalled();
});
it('should respect custom highWaterMark', async () => {
const fileSize = 11 * 1024 * 1024; // 11MB
const customHighWaterMark = 128 * 1024; // 128KB
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
const mockStream = new Readable({
read() {
this.push(Buffer.from('test'));
this.push(null);
},
});
(createReadStream as jest.Mock).mockReturnValue(mockStream);
await readFileAsBuffer(mockFilePath, { highWaterMark: customHighWaterMark });
expect(createReadStream).toHaveBeenCalledWith(mockFilePath, {
highWaterMark: customHighWaterMark,
});
});
it('should handle empty buffer files', async () => {
const emptyBuffer = Buffer.alloc(0);
(stat as jest.Mock).mockResolvedValue({ size: 0 });
(readFile as jest.Mock).mockResolvedValue(emptyBuffer);
const result = await readFileAsBuffer(mockFilePath);
expect(result).toEqual({
content: emptyBuffer,
bytes: 0,
});
});
it('should propagate errors', async () => {
const error = new Error('Access denied');
(stat as jest.Mock).mockResolvedValue({ size: 100 });
(readFile as jest.Mock).mockRejectedValue(error);
await expect(readFileAsBuffer(mockFilePath)).rejects.toThrow('Access denied');
});
});
describe('readJsonFile', () => {
const validJson = { name: 'test', value: 123, nested: { key: 'value' } };
const jsonString = JSON.stringify(validJson);
it('should parse valid JSON files', async () => {
(stat as jest.Mock).mockResolvedValue({ size: jsonString.length });
(readFile as jest.Mock).mockResolvedValue(jsonString);
const result = await readJsonFile(mockFilePath);
expect(result).toEqual(validJson);
expect(readFile).toHaveBeenCalledWith(mockFilePath, 'utf8');
});
it('should parse JSON with provided fileSize', async () => {
const fileSize = jsonString.length;
(readFile as jest.Mock).mockResolvedValue(jsonString);
const result = await readJsonFile(mockFilePath, { fileSize });
expect(result).toEqual(validJson);
expect(stat).not.toHaveBeenCalled();
});
it('should handle JSON arrays', async () => {
const jsonArray = [1, 2, 3, { key: 'value' }];
const arrayString = JSON.stringify(jsonArray);
(stat as jest.Mock).mockResolvedValue({ size: arrayString.length });
(readFile as jest.Mock).mockResolvedValue(arrayString);
const result = await readJsonFile(mockFilePath);
expect(result).toEqual(jsonArray);
});
it('should throw on invalid JSON', async () => {
const invalidJson = '{ invalid json }';
(stat as jest.Mock).mockResolvedValue({ size: invalidJson.length });
(readFile as jest.Mock).mockResolvedValue(invalidJson);
await expect(readJsonFile(mockFilePath)).rejects.toThrow();
});
it('should throw on empty file', async () => {
(stat as jest.Mock).mockResolvedValue({ size: 0 });
(readFile as jest.Mock).mockResolvedValue('');
await expect(readJsonFile(mockFilePath)).rejects.toThrow();
});
it('should handle large JSON files with streaming', async () => {
const largeJson = { data: 'x'.repeat(11 * 1024 * 1024) }; // >10MB
const largeJsonString = JSON.stringify(largeJson);
const fileSize = largeJsonString.length;
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
// Create chunks for streaming
const chunks: string[] = [];
let offset = 0;
const chunkSize = 5 * 1024 * 1024; // 5MB chunks
while (offset < largeJsonString.length) {
chunks.push(largeJsonString.slice(offset, offset + chunkSize));
offset += chunkSize;
}
const mockStream = new Readable({
read() {
if (chunks.length > 0) {
this.push(chunks.shift());
} else {
this.push(null);
}
},
});
(createReadStream as jest.Mock).mockReturnValue(mockStream);
const result = await readJsonFile(mockFilePath);
expect(result).toEqual(largeJson);
expect(createReadStream).toHaveBeenCalled();
expect(readFile).not.toHaveBeenCalled();
});
it('should use custom stream threshold', async () => {
const customThreshold = 100;
const json = { test: 'x'.repeat(200) };
const jsonStr = JSON.stringify(json);
const fileSize = jsonStr.length;
(stat as jest.Mock).mockResolvedValue({ size: fileSize });
const mockStream = new Readable({
read() {
this.push(jsonStr);
this.push(null);
},
});
(createReadStream as jest.Mock).mockReturnValue(mockStream);
await readJsonFile(mockFilePath, { streamThreshold: customThreshold });
expect(createReadStream).toHaveBeenCalled();
});
it('should preserve type with generics', async () => {
interface TestType {
id: number;
name: string;
}
const typedJson: TestType = { id: 1, name: 'test' };
const jsonString = JSON.stringify(typedJson);
(stat as jest.Mock).mockResolvedValue({ size: jsonString.length });
(readFile as jest.Mock).mockResolvedValue(jsonString);
const result = await readJsonFile<TestType>(mockFilePath);
expect(result).toEqual(typedJson);
expect(result.id).toBe(1);
expect(result.name).toBe('test');
});
});
});

View file

@ -9,12 +9,25 @@ import type { AxiosInstance, AxiosProxyConfig, AxiosError } from 'axios';
* @param options.error - The Axios error object.
* @returns The log message.
*/
export const logAxiosError = ({ message, error }: { message: string; error: AxiosError }) => {
export const logAxiosError = ({
message,
error,
}: {
message: string;
error: AxiosError | Error | unknown;
}) => {
let logMessage = message;
try {
const stack = error.stack || 'No stack trace available';
const stack =
error != null
? (error as Error | AxiosError)?.stack || 'No stack trace available'
: 'No stack trace available';
const errorMessage =
error != null
? (error as Error | AxiosError)?.message || 'No error message available'
: 'No error message available';
if (error.response?.status) {
if (axios.isAxiosError(error) && error.response && error.response?.status) {
const { status, headers, data } = error.response;
logMessage = `${message} The server responded with status ${status}: ${error.message}`;
logger.error(logMessage, {
@ -23,18 +36,18 @@ export const logAxiosError = ({ message, error }: { message: string; error: Axio
data,
stack,
});
} else if (error.request) {
} else if (axios.isAxiosError(error) && error.request) {
const { method, url } = error.config || {};
logMessage = `${message} No response received for ${method ? method.toUpperCase() : ''} ${url || ''}: ${error.message}`;
logger.error(logMessage, {
requestInfo: { method, url },
stack,
});
} else if (error?.message?.includes("Cannot read properties of undefined (reading 'status')")) {
logMessage = `${message} It appears the request timed out or was unsuccessful: ${error.message}`;
} else if (errorMessage?.includes("Cannot read properties of undefined (reading 'status')")) {
logMessage = `${message} It appears the request timed out or was unsuccessful: ${errorMessage}`;
logger.error(logMessage, { stack });
} else {
logMessage = `${message} An error occurred while setting up the request: ${error.message}`;
logMessage = `${message} An error occurred while setting up the request: ${errorMessage}`;
logger.error(logMessage, { stack });
}
} catch (err: unknown) {

View file

@ -1,5 +1,7 @@
import path from 'path';
import crypto from 'node:crypto';
import { createReadStream } from 'fs';
import { readFile, stat } from 'fs/promises';
/**
* Sanitize a filename by removing any directory components, replacing non-alphanumeric characters
@ -31,3 +33,122 @@ export function sanitizeFilename(inputName: string): string {
return name;
}
/**
* Options for reading files
*/
export interface ReadFileOptions {
encoding?: BufferEncoding;
/** Size threshold in bytes. Files larger than this will be streamed. Default: 10MB */
streamThreshold?: number;
/** Size of chunks when streaming. Default: 64KB */
highWaterMark?: number;
/** File size in bytes if known (e.g. from multer). Avoids extra stat() call. */
fileSize?: number;
}
/**
* Result from reading a file
*/
export interface ReadFileResult<T> {
content: T;
bytes: number;
}
/**
* Reads a file asynchronously. Uses streaming for large files to avoid memory issues.
*
* @param filePath - Path to the file to read
* @param options - Options for reading the file
* @returns Promise resolving to the file contents and size
* @throws Error if the file cannot be read
*/
export async function readFileAsString(
filePath: string,
options: ReadFileOptions = {},
): Promise<ReadFileResult<string>> {
const {
encoding = 'utf8',
streamThreshold = 10 * 1024 * 1024, // 10MB
highWaterMark = 64 * 1024, // 64KB
fileSize,
} = options;
// Get file size if not provided
const bytes = fileSize ?? (await stat(filePath)).size;
// For large files, use streaming to avoid memory issues
if (bytes > streamThreshold) {
const chunks: string[] = [];
const stream = createReadStream(filePath, {
encoding,
highWaterMark,
});
for await (const chunk of stream) {
chunks.push(chunk as string);
}
return { content: chunks.join(''), bytes };
}
// For smaller files, read directly
const content = await readFile(filePath, encoding);
return { content, bytes };
}
/**
* Reads a file as a Buffer asynchronously. Uses streaming for large files.
*
* @param filePath - Path to the file to read
* @param options - Options for reading the file
* @returns Promise resolving to the file contents and size
* @throws Error if the file cannot be read
*/
export async function readFileAsBuffer(
filePath: string,
options: Omit<ReadFileOptions, 'encoding'> = {},
): Promise<ReadFileResult<Buffer>> {
const {
streamThreshold = 10 * 1024 * 1024, // 10MB
highWaterMark = 64 * 1024, // 64KB
fileSize,
} = options;
// Get file size if not provided
const bytes = fileSize ?? (await stat(filePath)).size;
// For large files, use streaming to avoid memory issues
if (bytes > streamThreshold) {
const chunks: Buffer[] = [];
const stream = createReadStream(filePath, {
highWaterMark,
});
for await (const chunk of stream) {
chunks.push(chunk as Buffer);
}
return { content: Buffer.concat(chunks), bytes };
}
// For smaller files, read directly
const content = await readFile(filePath);
return { content, bytes };
}
/**
* Reads a JSON file asynchronously
*
* @param filePath - Path to the JSON file to read
* @param options - Options for reading the file
* @returns Promise resolving to the parsed JSON object
* @throws Error if the file cannot be read or parsed
*/
export async function readJsonFile<T = unknown>(
filePath: string,
options: Omit<ReadFileOptions, 'encoding'> = {},
): Promise<T> {
const { content } = await readFileAsString(filePath, { ...options, encoding: 'utf8' });
return JSON.parse(content);
}

View file

@ -1,6 +1,6 @@
import fs from 'fs';
import path from 'path';
import axios from 'axios';
import { readFileAsString } from './files';
import { loadServiceKey } from './key';
jest.mock('fs');
@ -11,6 +11,10 @@ jest.mock('@librechat/data-schemas', () => ({
},
}));
jest.mock('./files', () => ({
readFileAsString: jest.fn(),
}));
describe('loadServiceKey', () => {
const mockServiceKey = {
type: 'service_account',
@ -49,10 +53,13 @@ describe('loadServiceKey', () => {
it('should load from file path', async () => {
const filePath = '/path/to/service-key.json';
(fs.readFileSync as jest.Mock).mockReturnValue(JSON.stringify(mockServiceKey));
(readFileAsString as jest.Mock).mockResolvedValue({
content: JSON.stringify(mockServiceKey),
bytes: JSON.stringify(mockServiceKey).length,
});
const result = await loadServiceKey(filePath);
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve(filePath), 'utf8');
expect(readFileAsString).toHaveBeenCalledWith(path.resolve(filePath));
expect(result).toEqual(mockServiceKey);
});
@ -73,9 +80,7 @@ describe('loadServiceKey', () => {
it('should handle file read errors', async () => {
const filePath = '/path/to/nonexistent.json';
(fs.readFileSync as jest.Mock).mockImplementation(() => {
throw new Error('File not found');
});
(readFileAsString as jest.Mock).mockRejectedValue(new Error('File not found'));
const result = await loadServiceKey(filePath);
expect(result).toBeNull();

View file

@ -1,7 +1,7 @@
import fs from 'fs';
import path from 'path';
import axios from 'axios';
import { logger } from '@librechat/data-schemas';
import { readFileAsString } from './files';
export interface GoogleServiceKey {
type?: string;
@ -63,7 +63,7 @@ export async function loadServiceKey(keyPath: string): Promise<GoogleServiceKey
// It's a file path
try {
const absolutePath = path.isAbsolute(keyPath) ? keyPath : path.resolve(keyPath);
const fileContent = fs.readFileSync(absolutePath, 'utf8');
const { content: fileContent } = await readFileAsString(absolutePath);
serviceKey = JSON.parse(fileContent);
} catch (error) {
logger.error(`Failed to load service key from file: ${keyPath}`, error);