From bc039cea2912cede0d825e742e03eb37a8950d41 Mon Sep 17 00:00:00 2001 From: Ruben Talstra Date: Mon, 31 Mar 2025 19:44:20 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20fix:=20Azure=20Blob=20Integratio?= =?UTF-8?q?n=20and=20File=20Source=20References=20(#6575)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🔧 fix: Update file source references to include 'azure_blob' for correct service initialization * 🔧 fix: Add Azure Blob Storage Emulator entries to .gitignore * fix: Update file source references to include 'azure_blob' for correct service initialization * fix: Refactor Azure Blob Storage functions to use environment variables for access control and container name, fix deletion improper logging and improper params * fix: Add basePath determination for agent file uploads based on MIME type * fix: Implement file streaming to Azure Blob Storage to optimize memory usage during uploads (non-images) * fix: Update SourceIcon to include 'azure_blob' class and adjust model setting in useSelectorEffects for assistants * chore: import order --------- Co-authored-by: Danny Avila --- .gitignore | 4 + api/server/services/AppService.js | 2 +- api/server/services/Files/Azure/crud.js | 99 +++++++++++++++---- api/server/services/Files/images/encode.js | 2 +- api/server/services/Files/process.js | 5 +- api/server/services/Files/strategies.js | 2 + .../Chat/Input/Files/SourceIcon.tsx | 3 +- .../src/hooks/Endpoint/useSelectorEffects.ts | 2 +- packages/data-provider/src/types/files.ts | 1 + 9 files changed, 93 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index c4477db92..bd3b596c8 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,10 @@ client/public/main.js client/public/main.js.map client/public/main.js.LICENSE.txt +# Azure Blob Storage Emulator (Azurite) +__azurite** +__blobstorage__/**/* + # Dependency directorys # Deployed apps should consider commenting these lines out: # see https://npmjs.org/doc/faq.html#Should-I-check-my-node_modules-folder-into-git diff --git a/api/server/services/AppService.js b/api/server/services/AppService.js index baead9744..871599712 100644 --- a/api/server/services/AppService.js +++ b/api/server/services/AppService.js @@ -52,7 +52,7 @@ const AppService = async (app) => { if (fileStrategy === FileSources.firebase) { initializeFirebase(); - } else if (fileStrategy === FileSources.azure) { + } else if (fileStrategy === FileSources.azure_blob) { initializeAzureBlobService(); } else if (fileStrategy === FileSources.s3) { initializeS3(); diff --git a/api/server/services/Files/Azure/crud.js b/api/server/services/Files/Azure/crud.js index 638da34b2..cb52de831 100644 --- a/api/server/services/Files/Azure/crud.js +++ b/api/server/services/Files/Azure/crud.js @@ -1,11 +1,13 @@ const fs = require('fs'); const path = require('path'); +const mime = require('mime'); const axios = require('axios'); const fetch = require('node-fetch'); const { logger } = require('~/config'); const { getAzureContainerClient } = require('./initialize'); const defaultBasePath = 'images'; +const { AZURE_STORAGE_PUBLIC_ACCESS = 'true', AZURE_CONTAINER_NAME = 'files' } = process.env; /** * Uploads a buffer to Azure Blob Storage. @@ -29,10 +31,9 @@ async function saveBufferToAzure({ }) { try { const containerClient = getAzureContainerClient(containerName); + const access = AZURE_STORAGE_PUBLIC_ACCESS?.toLowerCase() === 'true' ? 'blob' : undefined; // Create the container if it doesn't exist. This is done per operation. - await containerClient.createIfNotExists({ - access: process.env.AZURE_STORAGE_PUBLIC_ACCESS ? 'blob' : undefined, - }); + await containerClient.createIfNotExists({ access }); const blobPath = `${basePath}/${userId}/${fileName}`; const blockBlobClient = containerClient.getBlockBlobClient(blobPath); await blockBlobClient.uploadData(buffer); @@ -97,25 +98,21 @@ async function getAzureURL({ fileName, basePath = defaultBasePath, userId, conta * Deletes a blob from Azure Blob Storage. * * @param {Object} params - * @param {string} params.fileName - The name of the file. - * @param {string} [params.basePath='images'] - The base folder where the file is stored. - * @param {string} params.userId - The user's id. - * @param {string} [params.containerName] - The Azure Blob container name. + * @param {ServerRequest} params.req - The Express request object. + * @param {MongoFile} params.file - The file object. */ -async function deleteFileFromAzure({ - fileName, - basePath = defaultBasePath, - userId, - containerName, -}) { +async function deleteFileFromAzure(req, file) { try { - const containerClient = getAzureContainerClient(containerName); - const blobPath = `${basePath}/${userId}/${fileName}`; + const containerClient = getAzureContainerClient(AZURE_CONTAINER_NAME); + const blobPath = file.filepath.split(`${AZURE_CONTAINER_NAME}/`)[1]; + if (!blobPath.includes(req.user.id)) { + throw new Error('User ID not found in blob path'); + } const blockBlobClient = containerClient.getBlockBlobClient(blobPath); await blockBlobClient.delete(); logger.debug('[deleteFileFromAzure] Blob deleted successfully from Azure Blob Storage'); } catch (error) { - logger.error('[deleteFileFromAzure] Error deleting blob:', error.message); + logger.error('[deleteFileFromAzure] Error deleting blob:', error); if (error.statusCode === 404) { return; } @@ -123,6 +120,65 @@ async function deleteFileFromAzure({ } } +/** + * Streams a file from disk directly to Azure Blob Storage without loading + * the entire file into memory. + * + * @param {Object} params + * @param {string} params.userId - The user's id. + * @param {string} params.filePath - The local file path to upload. + * @param {string} params.fileName - The name of the file in Azure. + * @param {string} [params.basePath='images'] - The base folder within the container. + * @param {string} [params.containerName] - The Azure Blob container name. + * @returns {Promise} The URL of the uploaded blob. + */ +async function streamFileToAzure({ + userId, + filePath, + fileName, + basePath = defaultBasePath, + containerName, +}) { + try { + const containerClient = getAzureContainerClient(containerName); + const access = AZURE_STORAGE_PUBLIC_ACCESS?.toLowerCase() === 'true' ? 'blob' : undefined; + + // Create the container if it doesn't exist + await containerClient.createIfNotExists({ access }); + + const blobPath = `${basePath}/${userId}/${fileName}`; + const blockBlobClient = containerClient.getBlockBlobClient(blobPath); + + // Get file size for proper content length + const stats = await fs.promises.stat(filePath); + + // Create read stream from the file + const fileStream = fs.createReadStream(filePath); + + const blobContentType = mime.getType(fileName); + await blockBlobClient.uploadStream( + fileStream, + undefined, // Use default concurrency (5) + undefined, // Use default buffer size (8MB) + { + blobHTTPHeaders: { + blobContentType, + }, + onProgress: (progress) => { + logger.debug( + `[streamFileToAzure] Upload progress: ${progress.loadedBytes} bytes of ${stats.size}`, + ); + }, + }, + ); + + return blockBlobClient.url; + } catch (error) { + logger.error('[streamFileToAzure] Error streaming file:', error); + throw error; + } +} + /** * Uploads a file from the local file system to Azure Blob Storage. * @@ -146,18 +202,19 @@ async function uploadFileToAzure({ }) { try { const inputFilePath = file.path; - const inputBuffer = await fs.promises.readFile(inputFilePath); - const bytes = Buffer.byteLength(inputBuffer); + const stats = await fs.promises.stat(inputFilePath); + const bytes = stats.size; const userId = req.user.id; const fileName = `${file_id}__${path.basename(inputFilePath)}`; - const fileURL = await saveBufferToAzure({ + + const fileURL = await streamFileToAzure({ userId, - buffer: inputBuffer, + filePath: inputFilePath, fileName, basePath, containerName, }); - await fs.promises.unlink(inputFilePath); + return { filepath: fileURL, bytes }; } catch (error) { logger.error('[uploadFileToAzure] Error uploading file:', error); diff --git a/api/server/services/Files/images/encode.js b/api/server/services/Files/images/encode.js index 759e36378..85d651397 100644 --- a/api/server/services/Files/images/encode.js +++ b/api/server/services/Files/images/encode.js @@ -37,7 +37,7 @@ const base64Only = new Set([ EModelEndpoint.bedrock, ]); -const blobStorageSources = new Set([FileSources.azure, FileSources.s3]); +const blobStorageSources = new Set([FileSources.azure_blob, FileSources.s3]); /** * Encodes and formats the given files. diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index 78a4976e2..384955dab 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -492,7 +492,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { let fileInfoMetadata; const entity_id = messageAttachment === true ? undefined : agent_id; - + const basePath = mime.getType(file.originalname)?.startsWith('image') ? 'images' : 'uploads'; if (tool_resource === EToolResources.execute_code) { const isCodeEnabled = await checkCapability(req, AgentCapabilities.execute_code); if (!isCodeEnabled) { @@ -532,7 +532,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { images, filename, filepath: ocrFileURL, - } = await handleFileUpload({ req, file, file_id, entity_id: agent_id }); + } = await handleFileUpload({ req, file, file_id, entity_id: agent_id, basePath }); const fileInfo = removeNullishValues({ text, @@ -582,6 +582,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { file, file_id, entity_id, + basePath, }); let filepath = _filepath; diff --git a/api/server/services/Files/strategies.js b/api/server/services/Files/strategies.js index d05ea0372..c6cfe7706 100644 --- a/api/server/services/Files/strategies.js +++ b/api/server/services/Files/strategies.js @@ -211,6 +211,8 @@ const getStrategyFunctions = (fileSource) => { } else if (fileSource === FileSources.openai) { return openAIStrategy(); } else if (fileSource === FileSources.azure) { + return openAIStrategy(); + } else if (fileSource === FileSources.azure_blob) { return azureStrategy(); } else if (fileSource === FileSources.vectordb) { return vectorStrategy(); diff --git a/client/src/components/Chat/Input/Files/SourceIcon.tsx b/client/src/components/Chat/Input/Files/SourceIcon.tsx index c3b2a4423..677de452f 100644 --- a/client/src/components/Chat/Input/Files/SourceIcon.tsx +++ b/client/src/components/Chat/Input/Files/SourceIcon.tsx @@ -10,7 +10,8 @@ const sourceToEndpoint = { const sourceToClassname = { [FileSources.openai]: 'bg-white/75 dark:bg-black/65', - [FileSources.azure]: 'azure-bg-color opacity-85', + [FileSources.azure]: 'azure-bg-color', + [FileSources.azure_blob]: 'azure-bg-color', [FileSources.execute_code]: 'bg-black text-white opacity-85', [FileSources.text]: 'bg-blue-500 dark:bg-blue-900 opacity-85 text-white', [FileSources.vectordb]: 'bg-yellow-700 dark:bg-yellow-900 opacity-85 text-white', diff --git a/client/src/hooks/Endpoint/useSelectorEffects.ts b/client/src/hooks/Endpoint/useSelectorEffects.ts index b9a245f2f..ecfc51c30 100644 --- a/client/src/hooks/Endpoint/useSelectorEffects.ts +++ b/client/src/hooks/Endpoint/useSelectorEffects.ts @@ -61,7 +61,7 @@ export default function useSelectorEffects({ } const assistant = assistantsMap?.[endpoint ?? '']?.[assistant_id]; if (assistant !== undefined) { - setOption('model')(''); + setOption('model')(assistant.model); setOption('assistant_id')(assistant_id); } } diff --git a/packages/data-provider/src/types/files.ts b/packages/data-provider/src/types/files.ts index 78ef7781e..2313af823 100644 --- a/packages/data-provider/src/types/files.ts +++ b/packages/data-provider/src/types/files.ts @@ -4,6 +4,7 @@ export enum FileSources { local = 'local', firebase = 'firebase', azure = 'azure', + azure_blob = 'azure_blob', openai = 'openai', s3 = 's3', vectordb = 'vectordb',