LibreChat/api/server/services/Files/provision.js
Danny Avila bdf4ab6043 🔧 feat: Unified file experience — schema, deferred upload, lazy provisioning
Introduces the foundation for a unified file upload experience where users
upload files once without choosing a tool_resource upfront. Files are stored
in the configured storage strategy and lazily provisioned to tool environments
(execute_code, file_search) at chat-request time based on agent capabilities.

Phase 1 - Schema + Server-Side Unified Upload:
- Add FileInteractionMode enum (text/provider/deferred/legacy) to fileConfigSchema
- Add defaultFileInteraction field to EndpointFileConfig and FileConfig types
- Update mergeFileConfig/mergeWithDefault to propagate the new field
- Modify processAgentFileUpload to support uploads without tool_resource
  using effectiveToolResource resolved from config (default: deferred)

Phase 2 - Lazy Provisioning + Multi-Resource Support:
- Create provision.js with provisionToCodeEnv and provisionToVectorDB
- Extend primeResources with lazy provisioning step that provisions
  deferred files to enabled tool environments at chat-request start
- Remove early returns in categorizeFileForToolResources so files can
  exist in multiple tool_resources simultaneously
- Wire provisioning callbacks through initializeAgent dependency injection
2026-03-21 18:07:14 -04:00

135 lines
4.2 KiB
JavaScript

const fs = require('fs');
const { EnvVar } = require('@librechat/agents');
const { logger } = require('@librechat/data-schemas');
const { FileSources } = require('librechat-data-provider');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { getStrategyFunctions } = require('./strategies');
const { updateFile } = require('~/models');
/**
* Provisions a file to the code execution environment.
* Gets a read stream from our storage and uploads to the code env.
*
* @param {object} params
* @param {object} params.req - Express request object (needs req.user.id)
* @param {import('librechat-data-provider').TFile} params.file - The file record from DB
* @param {string} [params.entity_id] - Optional entity ID (agent_id)
* @returns {Promise<string>} The fileIdentifier from the code env
*/
async function provisionToCodeEnv({ req, file, entity_id = '' }) {
const { getDownloadStream } = getStrategyFunctions(file.source);
if (!getDownloadStream) {
throw new Error(
`Cannot provision file "${file.filename}" to code env: storage source "${file.source}" does not support download streams`,
);
}
const { handleFileUpload: uploadCodeEnvFile } = getStrategyFunctions(FileSources.execute_code);
const result = await loadAuthValues({ userId: req.user.id, authFields: [EnvVar.CODE_API_KEY] });
const stream = await getDownloadStream(req, file.filepath);
const fileIdentifier = await uploadCodeEnvFile({
req,
stream,
filename: file.filename,
apiKey: result[EnvVar.CODE_API_KEY],
entity_id,
});
const updatedMetadata = {
...file.metadata,
fileIdentifier,
};
await updateFile({
file_id: file.file_id,
metadata: updatedMetadata,
});
logger.debug(
`[provisionToCodeEnv] Provisioned file "${file.filename}" (${file.file_id}) to code env`,
);
return fileIdentifier;
}
/**
* Provisions a file to the vector DB for file_search/RAG.
* Gets the file from our storage and uploads vectors/embeddings.
*
* @param {object} params
* @param {object} params.req - Express request object
* @param {import('librechat-data-provider').TFile} params.file - The file record from DB
* @param {string} [params.entity_id] - Optional entity ID (agent_id)
* @returns {Promise<{ embedded: boolean }>} Embedding result
*/
async function provisionToVectorDB({ req, file, entity_id }) {
if (!process.env.RAG_API_URL) {
logger.warn('[provisionToVectorDB] RAG_API_URL not defined, skipping vector provisioning');
return { embedded: false };
}
const { getDownloadStream } = getStrategyFunctions(file.source);
if (!getDownloadStream) {
throw new Error(
`Cannot provision file "${file.filename}" to vector DB: storage source "${file.source}" does not support download streams`,
);
}
// The uploadVectors function expects a file-like object with a `path` property for fs.createReadStream.
// Since we're provisioning from storage (not a multer upload), we need to stream to a temp file first.
const os = require('os');
const path = require('path');
const tmpPath = path.join(os.tmpdir(), `provision-${file.file_id}-${file.filename}`);
try {
const stream = await getDownloadStream(req, file.filepath);
await new Promise((resolve, reject) => {
const writeStream = fs.createWriteStream(tmpPath);
stream.pipe(writeStream);
writeStream.on('finish', resolve);
writeStream.on('error', reject);
stream.on('error', reject);
});
const { uploadVectors } = require('./VectorDB/crud');
const tempFile = {
path: tmpPath,
originalname: file.filename,
mimetype: file.type,
size: file.bytes,
};
const embeddingResult = await uploadVectors({
req,
file: tempFile,
file_id: file.file_id,
entity_id,
});
const embedded = embeddingResult?.embedded ?? false;
await updateFile({
file_id: file.file_id,
embedded,
});
logger.debug(
`[provisionToVectorDB] Provisioned file "${file.filename}" (${file.file_id}) to vector DB, embedded=${embedded}`,
);
return { embedded };
} finally {
// Clean up temp file
try {
fs.unlinkSync(tmpPath);
} catch {
// Ignore cleanup errors
}
}
}
module.exports = {
provisionToCodeEnv,
provisionToVectorDB,
};