diff --git a/api/models/Agent.js b/api/models/Agent.js index 11fd6dabb2..050fa9cb88 100644 --- a/api/models/Agent.js +++ b/api/models/Agent.js @@ -278,6 +278,7 @@ const updateAgent = async (searchParameter, updateData, updatingUserId = null) = } } + const versionEntry = { ...versionData, ...directUpdates, diff --git a/api/server/services/Files/AzureDocumentIntelligence/crud.js b/api/server/services/Files/AzureDocumentIntelligence/crud.js new file mode 100644 index 0000000000..80931cc4f7 --- /dev/null +++ b/api/server/services/Files/AzureDocumentIntelligence/crud.js @@ -0,0 +1,62 @@ +const axios = require('axios'); +const fs = require('fs'); +const { logger } = require('~/config'); + +/** + * Uploads a document to Azure Document Intelligence API and processes the result. + * + * @param {Object} params - The parameters for the Azure Document Intelligence request. + * @param {string} params.filePath - The path to the file on disk. + * @param {string} params.apiKey - Azure API key. + * @param {string} params.endpoint - Azure Document Intelligence endpoint. + * @param {string} params.modelId - The model ID to use for analysis. + * @returns {Promise} - The Document Intelligence result. + */ +async function uploadAzureDocumentIntelligence({ filePath, apiKey, endpoint, modelId }) { + const fileBuffer = fs.readFileSync(filePath); + const base64Source = fileBuffer.toString('base64'); + + try { + const response = await axios.post(`${endpoint}/documentModels/${modelId}/analyze?outputContentFormat=markdown`, { + base64Source, + }, { + headers: { + 'Ocp-Apim-Subscription-Key': apiKey, + 'Content-Type': 'application/json', + }, + }); + const operationLocation = response.headers['Operation-Location']; + + // Polling for the result + let result; + while (true) { + const pollResponse = await axios.get(operationLocation, { + headers: { + 'Ocp-Apim-Subscription-Key': apiKey, + }, + }); + if (pollResponse.data.status === 'succeeded') { + const resultUrl = pollResponse.data.resultUrl; // URL to fetch the analysis result + const resultResponse = await axios.get(resultUrl, { + headers: { + 'Ocp-Apim-Subscription-Key': apiKey, + }, + }); + result = resultResponse.data.analyzeResult.content; // Final analysis result + break; + } else if (pollResponse.data.status === 'failed') { + throw new Error('Azure Document Intelligence processing failed.'); + } + await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds before polling again + } + + return result; + } catch (error) { + logger.error('Error performing Azure Document Intelligence:', error.message); + throw error; + } +} + +module.exports = { + uploadAzureDocumentIntelligence, +}; diff --git a/api/server/services/Files/AzureDocumentIntelligence/crud.spec.js b/api/server/services/Files/AzureDocumentIntelligence/crud.spec.js new file mode 100644 index 0000000000..9c6cb2a458 --- /dev/null +++ b/api/server/services/Files/AzureDocumentIntelligence/crud.spec.js @@ -0,0 +1,84 @@ +const fs = require('fs'); + +const mockAxios = { + interceptors: { + request: { use: jest.fn(), eject: jest.fn() }, + response: { use: jest.fn(), eject: jest.fn() }, + }, + create: jest.fn().mockReturnValue({ + defaults: { + proxy: null, + }, + get: jest.fn().mockResolvedValue({ data: {} }), + post: jest.fn().mockResolvedValue({ data: {} }), + put: jest.fn().mockResolvedValue({ data: {} }), + delete: jest.fn().mockResolvedValue({ data: {} }), + }), + get: jest.fn().mockResolvedValue({ data: {} }), + post: jest.fn().mockResolvedValue({ data: {} }), + put: jest.fn().mockResolvedValue({ data: {} }), + delete: jest.fn().mockResolvedValue({ data: {} }), + reset: jest.fn().mockImplementation(function () { + this.get.mockClear(); + this.post.mockClear(); + this.put.mockClear(); + this.delete.mockClear(); + this.create.mockClear(); + }), +}; + +jest.mock('axios', () => mockAxios); +jest.mock('fs'); +jest.mock('~/config', () => ({ + logger: { + error: jest.fn(), + }, + createAxiosInstance: () => mockAxios, +})); +jest.mock('~/server/services/Tools/credentials', () => ({ + loadAuthValues: jest.fn(), +})); + +const { uploadAzureDocumentIntelligence } = require('./crud'); + +describe('AzureDocumentIntelligence Service', () => { + it('should upload a document and process the result using Azure Document Intelligence API', async () => { + const mockFileBuffer = Buffer.from('test file content'); + const mockBase64Source = mockFileBuffer.toString('base64'); + const mockOperationLocation = 'https://azure-ocr-endpoint.com/operation'; + const mockResultUrl = 'https://azure-ocr-endpoint.com/result'; + const mockFinalResult = { analyzeResult: { content: 'Final analysis result' } }; + + fs.readFileSync.mockReturnValue(mockFileBuffer); + + mockAxios.post + .mockResolvedValueOnce({ headers: { 'Operation-Location': mockOperationLocation } }) // Initial upload + .mockResolvedValueOnce({ data: { status: 'succeeded', resultUrl: mockResultUrl } }); // Polling success + + mockAxios.get + .mockResolvedValueOnce({ data: { status: 'succeeded', resultUrl: mockResultUrl } }) // Polling + .mockResolvedValueOnce({ data: mockFinalResult }); // Final result fetch + + const result = await uploadAzureDocumentIntelligence({ + filePath: '/path/to/test.pdf', + apiKey: 'azure-api-key', + endpoint: 'https://azure-ocr-endpoint.com', + modelId: 'prebuilt-layout', + }); + + expect(fs.readFileSync).toHaveBeenCalledWith('/path/to/test.pdf'); + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://azure-ocr-endpoint.com/documentModels/prebuilt-invoice:analyze', + { base64Source: mockBase64Source }, + expect.objectContaining({ + headers: expect.objectContaining({ + 'Ocp-Apim-Subscription-Key': 'azure-api-key', + 'Content-Type': 'application/json', + }), + }), + ); + expect(mockAxios.get).toHaveBeenCalledWith(mockOperationLocation, expect.any(Object)); + expect(mockAxios.get).toHaveBeenCalledWith(mockResultUrl, expect.any(Object)); + expect(result).toEqual(mockFinalResult.analyzeResult.content); + }); +}); diff --git a/api/server/services/Files/AzureDocumentIntelligence/index.js b/api/server/services/Files/AzureDocumentIntelligence/index.js new file mode 100644 index 0000000000..a6223d1ee5 --- /dev/null +++ b/api/server/services/Files/AzureDocumentIntelligence/index.js @@ -0,0 +1,5 @@ +const crud = require('./crud'); + +module.exports = { + ...crud, +}; diff --git a/api/server/services/Files/strategies.js b/api/server/services/Files/strategies.js index c6cfe77069..647982cb67 100644 --- a/api/server/services/Files/strategies.js +++ b/api/server/services/Files/strategies.js @@ -47,6 +47,7 @@ const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./O const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code'); const { uploadVectors, deleteVectors } = require('./VectorDB'); const { uploadMistralOCR } = require('./MistralOCR'); +const { uploadAzureDocumentIntelligence } = require('./AzureDocumentIntelligence'); // Import the function /** * Firebase Storage Strategy Functions @@ -202,6 +203,26 @@ const mistralOCRStrategy = () => ({ handleFileUpload: uploadMistralOCR, }); +const azureOCRStrategy = () => ({ + /** @type {typeof saveFileFromURL | null} */ + saveURL: null, + /** @type {typeof saveFileFromURL | null} */ + getFileURL: null, + /** @type {typeof saveFileFromURL | null} */ + saveBuffer: null, + /** @type {typeof saveFileFromURL | null} */ + processAvatar: null, + /** @type {typeof saveFileFromURL | null} */ + handleImageUpload: null, + /** @type {typeof saveFileFromURL | null} */ + prepareImagePayload: null, + /** @type {typeof saveFileFromURL | null} */ + deleteFile: null, + handleFileUpload: uploadAzureDocumentIntelligence, + /** @type {typeof saveFileFromURL | null} */ + getDownloadStream: null, +}); + // Strategy Selector const getStrategyFunctions = (fileSource) => { if (fileSource === FileSources.firebase) { @@ -222,6 +243,8 @@ const getStrategyFunctions = (fileSource) => { return codeOutputStrategy(); } else if (fileSource === FileSources.mistral_ocr) { return mistralOCRStrategy(); + } else if (fileSource === FileSources.azure_ocr) { + return azureOCRStrategy(); } else { throw new Error('Invalid file source'); } diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 12e66f5630..3fbd1a2399 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -926,4 +926,4 @@ "com_ui_zoom": "Zoom", "com_user_message": "You", "com_warning_resubmit_unsupported": "Resubmitting the AI message is not supported for this endpoint." -} \ No newline at end of file +} diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 5d796331ec..24dcbf78a3 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -581,6 +581,7 @@ export type TStartupConfig = { export enum OCRStrategy { MISTRAL_OCR = 'mistral_ocr', CUSTOM_OCR = 'custom_ocr', + AZURE_DOCUMENT_INTELLIGENCE = 'azure_document_intelligence', } export enum SearchCategories { diff --git a/packages/data-provider/src/ocr.ts b/packages/data-provider/src/ocr.ts index cfde43025b..786efd4ce7 100644 --- a/packages/data-provider/src/ocr.ts +++ b/packages/data-provider/src/ocr.ts @@ -4,11 +4,14 @@ import { OCRStrategy } from '../src/config'; export function loadOCRConfig(config: TCustomConfig['ocr']): TCustomConfig['ocr'] { const baseURL = config?.baseURL ?? ''; const apiKey = config?.apiKey ?? ''; - const mistralModel = config?.mistralModel ?? ''; + const mistralModel = config?.mistralModel ?? ''; + const documentIntelligenceModel = config?.documentIntelligenceModel ?? ''; + return { apiKey, baseURL, mistralModel, + documentIntelligenceModel, strategy: config?.strategy ?? OCRStrategy.MISTRAL_OCR, }; } diff --git a/packages/data-provider/src/types/files.ts b/packages/data-provider/src/types/files.ts index 927002630f..e8e0b45313 100644 --- a/packages/data-provider/src/types/files.ts +++ b/packages/data-provider/src/types/files.ts @@ -10,6 +10,7 @@ export enum FileSources { vectordb = 'vectordb', execute_code = 'execute_code', mistral_ocr = 'mistral_ocr', + azure_ocr = 'azure_ocr', text = 'text', }