From 40b3bc9b840c9a5eec50debdaca15bd55ab3844d Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sun, 22 Mar 2026 12:32:12 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20feat:=20Unified=20file=20experie?= =?UTF-8?q?nce=20=E2=80=94=20schema,=20deferred=20upload,=20lazy=20provisi?= =?UTF-8?q?oning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 fixes for the unified file experience: - Add code env file staleness detection via batch session checks (checkSessionsAlive) — groups files by session_id, one API call per session, skips files updated within 6h safe window - Parallelize file provisioning across files using Promise.allSettled - Surface provisioning failures as warnings on InitializedAgent - Fix temp file path safety (use file_id + extension, not raw filename) - Fix inconsistent return types (normalize to [] instead of undefined) - Wire checkSessionsAlive through initialize.js → initialize.ts → primeResources --- .../services/Endpoints/agents/initialize.js | 8 +- api/server/services/Files/provision.js | 143 ++++++++++++++- packages/api/src/agents/initialize.ts | 13 +- packages/api/src/agents/resources.ts | 171 ++++++++++++------ 4 files changed, 273 insertions(+), 62 deletions(-) diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index 1fd326709a..7a8b13874e 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -23,7 +23,11 @@ const { } = require('~/server/controllers/agents/callbacks'); const { loadAgentTools, loadToolsForExecution } = require('~/server/services/ToolService'); const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions'); -const { provisionToCodeEnv, provisionToVectorDB } = require('~/server/services/Files/provision'); +const { + provisionToCodeEnv, + provisionToVectorDB, + checkSessionsAlive, +} = require('~/server/services/Files/provision'); const { getModelsConfig } = require('~/server/controllers/ModelController'); const { checkPermission } = require('~/server/services/PermissionService'); const AgentClient = require('~/server/controllers/agents/client'); @@ -219,6 +223,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { filterFilesByAgentAccess, provisionToCodeEnv, provisionToVectorDB, + checkSessionsAlive, }, ); @@ -302,6 +307,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { filterFilesByAgentAccess, provisionToCodeEnv, provisionToVectorDB, + checkSessionsAlive, }, ); diff --git a/api/server/services/Files/provision.js b/api/server/services/Files/provision.js index e70c9abc03..176275be5b 100644 --- a/api/server/services/Files/provision.js +++ b/api/server/services/Files/provision.js @@ -1,11 +1,21 @@ const fs = require('fs'); -const { EnvVar } = require('@librechat/agents'); +const path = require('path'); +const os = require('os'); +const { EnvVar, getCodeBaseURL } = require('@librechat/agents'); +const { + logAxiosError, + createAxiosInstance, + codeServerHttpAgent, + codeServerHttpsAgent, +} = require('@librechat/api'); const { logger } = require('@librechat/data-schemas'); const { FileSources } = require('librechat-data-provider'); const { loadAuthValues } = require('~/server/services/Tools/credentials'); const { getStrategyFunctions } = require('./strategies'); const { updateFile } = require('~/models'); +const axios = createAxiosInstance(); + /** * Provisions a file to the code execution environment. * Gets a read stream from our storage and uploads to the code env. @@ -78,9 +88,7 @@ async function provisionToVectorDB({ req, file, entity_id }) { // The uploadVectors function expects a file-like object with a `path` property for fs.createReadStream. // Since we're provisioning from storage (not a multer upload), we need to stream to a temp file first. - const os = require('os'); - const path = require('path'); - const tmpPath = path.join(os.tmpdir(), `provision-${file.file_id}-${file.filename}`); + const tmpPath = path.join(os.tmpdir(), `provision-${file.file_id}${path.extname(file.filename)}`); try { const stream = await getDownloadStream(req, file.filepath); @@ -129,7 +137,134 @@ async function provisionToVectorDB({ req, file, entity_id }) { } } +/** + * Check if a single code env file is still alive by querying its session. + * + * @param {object} params + * @param {import('librechat-data-provider').TFile} params.file - File with metadata.fileIdentifier + * @param {string} params.apiKey - CODE_API_KEY + * @returns {Promise} true if the file is still accessible in the code env + */ +async function checkCodeEnvFileAlive({ file, apiKey }) { + if (!file.metadata?.fileIdentifier) { + return false; + } + + try { + const baseURL = getCodeBaseURL(); + const [filePath, queryString] = file.metadata.fileIdentifier.split('?'); + const session_id = filePath.split('/')[0]; + + let queryParams = {}; + if (queryString) { + queryParams = Object.fromEntries(new URLSearchParams(queryString).entries()); + } + + const response = await axios({ + method: 'get', + url: `${baseURL}/files/${session_id}`, + params: { detail: 'summary', ...queryParams }, + headers: { + 'User-Agent': 'LibreChat/1.0', + 'X-API-Key': apiKey, + }, + httpAgent: codeServerHttpAgent, + httpsAgent: codeServerHttpsAgent, + timeout: 5000, + }); + + const found = response.data?.some((f) => f.name?.startsWith(filePath)); + return !!found; + } catch (error) { + logAxiosError({ + message: `[checkCodeEnvFileAlive] Error checking file "${file.filename}": ${error.message}`, + error, + }); + return false; + } +} + +/** + * Batch-check code env file liveness by session_id. + * Groups files by session, makes one API call per session. + * + * @param {object} params + * @param {import('librechat-data-provider').TFile[]} params.files - Files with metadata.fileIdentifier + * @param {string} params.userId - User ID for loading CODE_API_KEY + * @param {number} [params.staleSafeWindowMs=21600000] - Skip check if file updated within this window (default 6h) + * @returns {Promise>} Set of file_ids that are confirmed alive + */ +async function checkSessionsAlive({ files, userId, staleSafeWindowMs = 6 * 60 * 60 * 1000 }) { + const result = await loadAuthValues({ userId, authFields: [EnvVar.CODE_API_KEY] }); + const apiKey = result[EnvVar.CODE_API_KEY]; + const aliveFileIds = new Set(); + const now = Date.now(); + + // Group files by session_id, skip recently-updated files (fast pre-filter) + /** @type {Map>} */ + const sessionGroups = new Map(); + + for (const file of files) { + if (!file.metadata?.fileIdentifier) { + continue; + } + + const updatedAt = file.updatedAt ? new Date(file.updatedAt).getTime() : 0; + if (now - updatedAt < staleSafeWindowMs) { + aliveFileIds.add(file.file_id); + continue; + } + + const [filePath] = file.metadata.fileIdentifier.split('?'); + const session_id = filePath.split('/')[0]; + + if (!sessionGroups.has(session_id)) { + sessionGroups.set(session_id, []); + } + sessionGroups.get(session_id).push({ file_id: file.file_id, filePath }); + } + + // One API call per session (in parallel) + const baseURL = getCodeBaseURL(); + const sessionChecks = Array.from(sessionGroups.entries()).map( + async ([session_id, fileEntries]) => { + try { + const response = await axios({ + method: 'get', + url: `${baseURL}/files/${session_id}`, + params: { detail: 'summary' }, + headers: { + 'User-Agent': 'LibreChat/1.0', + 'X-API-Key': apiKey, + }, + httpAgent: codeServerHttpAgent, + httpsAgent: codeServerHttpsAgent, + timeout: 5000, + }); + + const remoteFiles = response.data ?? []; + for (const { file_id, filePath } of fileEntries) { + if (remoteFiles.some((f) => f.name?.startsWith(filePath))) { + aliveFileIds.add(file_id); + } + } + } catch (error) { + logAxiosError({ + message: `[checkSessionsAlive] Error checking session "${session_id}": ${error.message}`, + error, + }); + // All files in this session treated as expired + } + }, + ); + + await Promise.allSettled(sessionChecks); + return aliveFileIds; +} + module.exports = { provisionToCodeEnv, provisionToVectorDB, + checkCodeEnvFileAlive, + checkSessionsAlive, }; diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index 0d07cc59dc..d1d4f5ad78 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -35,6 +35,7 @@ import type { TFilterFilesByAgentAccess, TProvisionToCodeEnv, TProvisionToVectorDB, + TCheckSessionsAlive, } from './resources'; /** @@ -70,6 +71,8 @@ export type InitializedAgent = Agent & { actionsEnabled?: boolean; /** Maximum characters allowed in a single tool result before truncation. */ maxToolResultChars?: number; + /** Warnings from lazy file provisioning (e.g., failed uploads) */ + provisionWarnings?: string[]; }; /** @@ -151,6 +154,8 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods { provisionToCodeEnv?: TProvisionToCodeEnv; /** Optional: provision a file to the vector DB for file_search */ provisionToVectorDB?: TProvisionToVectorDB; + /** Optional: batch-check code env file liveness */ + checkSessionsAlive?: TCheckSessionsAlive; } /** @@ -292,7 +297,11 @@ export async function initializeAgent( }); } - const { attachments: primedAttachments, tool_resources } = await primeResources({ + const { + attachments: primedAttachments, + tool_resources, + warnings: provisionWarnings, + } = await primeResources({ req: req as never, getFiles: db.getFiles as never, filterFiles: db.filterFilesByAgentAccess, @@ -306,6 +315,7 @@ export async function initializeAgent( enabledToolResources: toolResourceSet, provisionToCodeEnv: db.provisionToCodeEnv, provisionToVectorDB: db.provisionToVectorDB, + checkSessionsAlive: db.checkSessionsAlive, }); const { @@ -463,6 +473,7 @@ export async function initializeAgent( useLegacyContent: !!options.useLegacyContent, tools: (tools ?? []) as GenericTool[] & string[], maxToolResultChars: maxToolResultCharsResolved, + provisionWarnings: provisionWarnings.length > 0 ? provisionWarnings : undefined, maxContextTokens: maxContextTokens != null && maxContextTokens > 0 ? maxContextTokens diff --git a/packages/api/src/agents/resources.ts b/packages/api/src/agents/resources.ts index 581d2d8f07..04b10fb267 100644 --- a/packages/api/src/agents/resources.ts +++ b/packages/api/src/agents/resources.ts @@ -25,6 +25,17 @@ export type TProvisionToVectorDB = (params: { entity_id?: string; }) => Promise<{ embedded: boolean }>; +/** + * Function type for batch-checking code env file liveness. + * Groups files by session, makes one API call per session. + * @returns Set of file_ids that are confirmed alive + */ +export type TCheckSessionsAlive = (params: { + files: TFile[]; + userId: string; + staleSafeWindowMs?: number; +}) => Promise>; + /** * Function type for retrieving files from the database * @param filter - MongoDB filter query for files @@ -185,6 +196,7 @@ export const primeResources = async ({ enabledToolResources, provisionToCodeEnv, provisionToVectorDB, + checkSessionsAlive, }: { req: ServerRequest & { user?: IUser }; appConfig?: AppConfig; @@ -200,9 +212,12 @@ export const primeResources = async ({ provisionToCodeEnv?: TProvisionToCodeEnv; /** Optional callback to provision a file to the vector DB for file_search */ provisionToVectorDB?: TProvisionToVectorDB; + /** Optional callback to batch-check code env file liveness by session */ + checkSessionsAlive?: TCheckSessionsAlive; }): Promise<{ - attachments: Array | undefined; + attachments: Array; tool_resources: AgentToolResources | undefined; + warnings: string[]; }> => { try { /** @@ -310,7 +325,7 @@ export const primeResources = async ({ } if (!_attachments) { - return { attachments: attachments.length > 0 ? attachments : undefined, tool_resources }; + return { attachments, tool_resources, warnings: [] }; } const files = await _attachments; @@ -342,6 +357,8 @@ export const primeResources = async ({ * agent's enabled tool resources, provision them now (at chat-request start). * This handles files uploaded via the unified upload flow (no tool_resource chosen at upload time). */ + const warnings: string[] = []; + if (enabledToolResources && enabledToolResources.size > 0 && attachments.length > 0) { const needsCodeEnv = enabledToolResources.has(EToolResources.execute_code) && provisionToCodeEnv != null; @@ -349,76 +366,117 @@ export const primeResources = async ({ enabledToolResources.has(EToolResources.file_search) && provisionToVectorDB != null; if (needsCodeEnv || needsVectorDB) { - for (const file of attachments) { - if (!file?.file_id) { - continue; + // Batch staleness check: verify code env files are still alive + let aliveFileIds: Set = new Set(); + if (needsCodeEnv && checkSessionsAlive && req.user?.id) { + const filesWithIdentifiers = attachments.filter( + (f) => f?.metadata?.fileIdentifier && f.file_id, + ); + if (filesWithIdentifiers.length > 0) { + aliveFileIds = await checkSessionsAlive({ + files: filesWithIdentifiers as TFile[], + userId: req.user.id, + }); } + } - // Skip images for file_search (not supported) - const isImage = file.type?.startsWith('image') ?? false; - - // Provision to code env if needed and not already provisioned - if ( - needsCodeEnv && - !file.metadata?.fileIdentifier && - !processedResourceFiles.has(`${EToolResources.execute_code}:${file.file_id}`) - ) { - try { - const fileIdentifier = await provisionToCodeEnv({ - req: req as ServerRequest & { user?: IUser }, - file, - entity_id: agentId, - }); - // Update the file object in-place so categorization picks it up - file.metadata = { ...file.metadata, fileIdentifier }; - addFileToResource({ - file, - resourceType: EToolResources.execute_code, - tool_resources, - processedResourceFiles, - }); - } catch (error) { - logger.error( - `[primeResources] Failed to provision file "${file.filename}" to code env`, - error, - ); + // Provision files in parallel + const provisionResults = await Promise.allSettled( + attachments.map(async (file) => { + if (!file?.file_id) { + return; } - } - // Provision to vector DB if needed and not already provisioned - if ( - needsVectorDB && - !isImage && - file.embedded !== true && - !processedResourceFiles.has(`${EToolResources.file_search}:${file.file_id}`) - ) { - try { - const result = await provisionToVectorDB({ - req: req as ServerRequest & { user?: IUser }, - file, - entity_id: agentId, - }); - if (result.embedded) { - file.embedded = true; + const isImage = file.type?.startsWith('image') ?? false; + const typedReq = req as ServerRequest & { user?: IUser }; + + // Code env provisioning (with staleness check) + if ( + needsCodeEnv && + !processedResourceFiles.has(`${EToolResources.execute_code}:${file.file_id}`) + ) { + const hasFileIdentifier = !!file.metadata?.fileIdentifier; + const isStale = hasFileIdentifier && !aliveFileIds.has(file.file_id); + const needsProvision = !hasFileIdentifier || isStale; + + if (needsProvision) { + if (isStale) { + logger.info( + `[primeResources] Code env file expired for "${file.filename}" (${file.file_id}), re-provisioning`, + ); + file.metadata = { ...file.metadata, fileIdentifier: undefined }; + } + + try { + const fileIdentifier = await provisionToCodeEnv({ + req: typedReq, + file, + entity_id: agentId, + }); + file.metadata = { ...file.metadata, fileIdentifier }; + addFileToResource({ + file, + resourceType: EToolResources.execute_code, + tool_resources, + processedResourceFiles, + }); + } catch (error) { + const msg = `Failed to provision "${file.filename}" to code env`; + logger.error(`[primeResources] ${msg}`, error); + warnings.push(msg); + } + } else { + // File is alive, ensure it's categorized addFileToResource({ file, - resourceType: EToolResources.file_search, + resourceType: EToolResources.execute_code, tool_resources, processedResourceFiles, }); } - } catch (error) { - logger.error( - `[primeResources] Failed to provision file "${file.filename}" to vector DB`, - error, - ); } + + // Vector DB provisioning + if ( + needsVectorDB && + !isImage && + file.embedded !== true && + !processedResourceFiles.has(`${EToolResources.file_search}:${file.file_id}`) + ) { + try { + const result = await provisionToVectorDB({ + req: typedReq, + file, + entity_id: agentId, + }); + if (result.embedded) { + file.embedded = true; + addFileToResource({ + file, + resourceType: EToolResources.file_search, + tool_resources, + processedResourceFiles, + }); + } + } catch (error) { + const msg = `Failed to provision "${file.filename}" to vector DB`; + logger.error(`[primeResources] ${msg}`, error); + warnings.push(msg); + } + } + }), + ); + + // Log any unexpected rejections from Promise.allSettled + for (const result of provisionResults) { + if (result.status === 'rejected') { + logger.error('[primeResources] Unexpected provisioning rejection', result.reason); } } } } - return { attachments: attachments.length > 0 ? attachments : [], tool_resources }; + return { attachments, tool_resources, warnings }; } catch (error) { logger.error('Error priming resources', error); @@ -438,6 +496,7 @@ export const primeResources = async ({ return { attachments: safeAttachments, tool_resources: _tool_resources, + warnings: [], }; } };