2025-08-27 03:44:39 -04:00
|
|
|
import axios from 'axios';
|
|
|
|
|
import FormData from 'form-data';
|
2025-09-20 10:17:24 -04:00
|
|
|
import { createReadStream } from 'fs';
|
2025-08-27 03:44:39 -04:00
|
|
|
import { logger } from '@librechat/data-schemas';
|
|
|
|
|
import { FileSources } from 'librechat-data-provider';
|
|
|
|
|
import type { Request as ServerRequest } from 'express';
|
2025-09-20 10:17:24 -04:00
|
|
|
import { logAxiosError, readFileAsString } from '~/utils';
|
2025-08-27 03:44:39 -04:00
|
|
|
import { generateShortLivedToken } from '~/crypto/jwt';
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Attempts to parse text using RAG API, falls back to native text parsing
|
2025-09-20 10:17:24 -04:00
|
|
|
* @param params - The parameters object
|
|
|
|
|
* @param params.req - The Express request object
|
|
|
|
|
* @param params.file - The uploaded file
|
|
|
|
|
* @param params.file_id - The file ID
|
|
|
|
|
* @returns
|
2025-08-27 03:44:39 -04:00
|
|
|
*/
|
|
|
|
|
export async function parseText({
|
|
|
|
|
req,
|
|
|
|
|
file,
|
|
|
|
|
file_id,
|
|
|
|
|
}: {
|
|
|
|
|
req: Pick<ServerRequest, 'user'> & {
|
|
|
|
|
user?: { id: string };
|
|
|
|
|
};
|
|
|
|
|
file: Express.Multer.File;
|
|
|
|
|
file_id: string;
|
|
|
|
|
}): Promise<{ text: string; bytes: number; source: string }> {
|
|
|
|
|
if (!process.env.RAG_API_URL) {
|
|
|
|
|
logger.debug('[parseText] RAG_API_URL not defined, falling back to native text parsing');
|
|
|
|
|
return parseTextNative(file);
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-20 10:17:24 -04:00
|
|
|
const userId = req.user?.id;
|
|
|
|
|
if (!userId) {
|
2025-08-27 03:44:39 -04:00
|
|
|
logger.debug('[parseText] No user ID provided, falling back to native text parsing');
|
|
|
|
|
return parseTextNative(file);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const healthResponse = await axios.get(`${process.env.RAG_API_URL}/health`, {
|
2025-09-20 10:17:24 -04:00
|
|
|
timeout: 10000,
|
2025-08-27 03:44:39 -04:00
|
|
|
});
|
|
|
|
|
if (healthResponse?.statusText !== 'OK' && healthResponse?.status !== 200) {
|
|
|
|
|
logger.debug('[parseText] RAG API health check failed, falling back to native parsing');
|
|
|
|
|
return parseTextNative(file);
|
|
|
|
|
}
|
|
|
|
|
} catch (healthError) {
|
2025-09-20 10:17:24 -04:00
|
|
|
logAxiosError({
|
|
|
|
|
message: '[parseText] RAG API health check failed, falling back to native parsing:',
|
|
|
|
|
error: healthError,
|
|
|
|
|
});
|
2025-08-27 03:44:39 -04:00
|
|
|
return parseTextNative(file);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
2025-09-20 10:17:24 -04:00
|
|
|
const jwtToken = generateShortLivedToken(userId);
|
2025-08-27 03:44:39 -04:00
|
|
|
const formData = new FormData();
|
|
|
|
|
formData.append('file_id', file_id);
|
2025-09-20 10:17:24 -04:00
|
|
|
formData.append('file', createReadStream(file.path));
|
2025-08-27 03:44:39 -04:00
|
|
|
|
|
|
|
|
const formHeaders = formData.getHeaders();
|
|
|
|
|
|
|
|
|
|
const response = await axios.post(`${process.env.RAG_API_URL}/text`, formData, {
|
|
|
|
|
headers: {
|
|
|
|
|
Authorization: `Bearer ${jwtToken}`,
|
|
|
|
|
accept: 'application/json',
|
|
|
|
|
...formHeaders,
|
|
|
|
|
},
|
|
|
|
|
timeout: 30000,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const responseData = response.data;
|
2025-09-20 10:17:24 -04:00
|
|
|
logger.debug(`[parseText] RAG API completed successfully (${response.status})`);
|
2025-08-27 03:44:39 -04:00
|
|
|
|
|
|
|
|
if (!('text' in responseData)) {
|
|
|
|
|
throw new Error('RAG API did not return parsed text');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
text: responseData.text,
|
|
|
|
|
bytes: Buffer.byteLength(responseData.text, 'utf8'),
|
|
|
|
|
source: FileSources.text,
|
|
|
|
|
};
|
|
|
|
|
} catch (error) {
|
2025-09-20 10:17:24 -04:00
|
|
|
logAxiosError({
|
|
|
|
|
message: '[parseText] RAG API text parsing failed, falling back to native parsing',
|
|
|
|
|
error,
|
|
|
|
|
});
|
2025-08-27 03:44:39 -04:00
|
|
|
return parseTextNative(file);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Native JavaScript text parsing fallback
|
|
|
|
|
* Simple text file reading - complex formats handled by RAG API
|
2025-09-20 10:17:24 -04:00
|
|
|
* @param file - The uploaded file
|
|
|
|
|
* @returns
|
2025-08-27 03:44:39 -04:00
|
|
|
*/
|
2025-09-20 10:17:24 -04:00
|
|
|
export async function parseTextNative(file: Express.Multer.File): Promise<{
|
2025-08-27 03:44:39 -04:00
|
|
|
text: string;
|
|
|
|
|
bytes: number;
|
|
|
|
|
source: string;
|
2025-09-20 10:17:24 -04:00
|
|
|
}> {
|
|
|
|
|
const { content: text, bytes } = await readFileAsString(file.path, {
|
|
|
|
|
fileSize: file.size,
|
|
|
|
|
});
|
2025-08-27 03:44:39 -04:00
|
|
|
|
2025-09-20 10:17:24 -04:00
|
|
|
return {
|
|
|
|
|
text,
|
|
|
|
|
bytes,
|
|
|
|
|
source: FileSources.text,
|
|
|
|
|
};
|
2025-08-27 03:44:39 -04:00
|
|
|
}
|