mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-05 01:58:50 +01:00
feat: Update Azure Document Intelligence upload function to return Markdown result
This commit is contained in:
parent
5dfad6b77b
commit
8711ca8b3a
2 changed files with 82 additions and 55 deletions
|
|
@ -3,7 +3,7 @@ const fs = require('fs');
|
|||
const { logger } = require('~/config');
|
||||
|
||||
/**
|
||||
* Uploads a document to Azure Document Intelligence API and processes the result.
|
||||
* Uploads a document to Azure Document Intelligence API and returns the Markdown result.
|
||||
*
|
||||
* @param {Object} params - The parameters for the Azure Document Intelligence request.
|
||||
* @param {string} params.filePath - The path to the file on disk.
|
||||
|
|
@ -13,15 +13,18 @@ const { logger } = require('~/config');
|
|||
* @returns {Promise<Object>} - The Document Intelligence result.
|
||||
*/
|
||||
async function uploadAzureDocumentIntelligence({ filePath, apiKey, endpoint, modelId }) {
|
||||
// Read and encode file
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
const base64Source = fileBuffer.toString('base64');
|
||||
|
||||
// Build URL (ensure no trailing slash on endpoint)
|
||||
const url = `${endpoint.replace(/\/+$/, '')}/documentModels/${modelId}:analyze?outputContentFormat=markdown`;
|
||||
|
||||
try {
|
||||
// Kick off the analysis
|
||||
const response = await axios.post(
|
||||
`${endpoint}/documentModels/${modelId}/analyze?outputContentFormat=markdown`,
|
||||
{
|
||||
base64Source,
|
||||
},
|
||||
url,
|
||||
{ base64Source },
|
||||
{
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': apiKey,
|
||||
|
|
@ -29,32 +32,37 @@ async function uploadAzureDocumentIntelligence({ filePath, apiKey, endpoint, mod
|
|||
},
|
||||
},
|
||||
);
|
||||
const operationLocation = response.headers['Operation-Location'];
|
||||
|
||||
// Polling for the result
|
||||
let result;
|
||||
while (true) {
|
||||
const pollResponse = await axios.get(operationLocation, {
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': apiKey,
|
||||
},
|
||||
});
|
||||
if (pollResponse.data.status === 'succeeded') {
|
||||
const resultUrl = pollResponse.data.resultUrl; // URL to fetch the analysis result
|
||||
const resultResponse = await axios.get(resultUrl, {
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': apiKey,
|
||||
},
|
||||
});
|
||||
result = resultResponse.data.analyzeResult.content; // Final analysis result
|
||||
break;
|
||||
} else if (pollResponse.data.status === 'failed') {
|
||||
throw new Error('Azure Document Intelligence processing failed.');
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds before polling again
|
||||
// Axios lower-cases header keys, but allow either form
|
||||
const headers = response.headers || {};
|
||||
const operationLocation = headers['operation-location'] || headers['Operation-Location'];
|
||||
if (!operationLocation) {
|
||||
throw new Error('Missing Operation-Location header in Azure response.');
|
||||
}
|
||||
|
||||
return result;
|
||||
// Poll until done
|
||||
let resultContent;
|
||||
while (true) {
|
||||
const pollResponse = await axios.get(operationLocation, {
|
||||
headers: { 'Ocp-Apim-Subscription-Key': apiKey },
|
||||
});
|
||||
|
||||
const { status, resultUrl } = pollResponse.data;
|
||||
if (status === 'succeeded') {
|
||||
const final = await axios.get(resultUrl, {
|
||||
headers: { 'Ocp-Apim-Subscription-Key': apiKey },
|
||||
});
|
||||
resultContent = final.data.analyzeResult.content;
|
||||
break;
|
||||
}
|
||||
if (status === 'failed') {
|
||||
throw new Error('Azure Document Intelligence processing failed.');
|
||||
}
|
||||
// Wait 2s before retry
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
}
|
||||
|
||||
return resultContent;
|
||||
} catch (error) {
|
||||
logger.error('Error performing Azure Document Intelligence:', error.message);
|
||||
throw error;
|
||||
|
|
|
|||
|
|
@ -6,9 +6,7 @@ const mockAxios = {
|
|||
response: { use: jest.fn(), eject: jest.fn() },
|
||||
},
|
||||
create: jest.fn().mockReturnValue({
|
||||
defaults: {
|
||||
proxy: null,
|
||||
},
|
||||
defaults: { proxy: null },
|
||||
get: jest.fn().mockResolvedValue({ data: {} }),
|
||||
post: jest.fn().mockResolvedValue({ data: {} }),
|
||||
put: jest.fn().mockResolvedValue({ data: {} }),
|
||||
|
|
@ -30,46 +28,52 @@ const mockAxios = {
|
|||
jest.mock('axios', () => mockAxios);
|
||||
jest.mock('fs');
|
||||
jest.mock('~/config', () => ({
|
||||
logger: {
|
||||
error: jest.fn(),
|
||||
},
|
||||
createAxiosInstance: () => mockAxios,
|
||||
}));
|
||||
jest.mock('~/server/services/Tools/credentials', () => ({
|
||||
loadAuthValues: jest.fn(),
|
||||
logger: { error: jest.fn() },
|
||||
}));
|
||||
|
||||
const { uploadAzureDocumentIntelligence } = require('./crud');
|
||||
|
||||
describe('AzureDocumentIntelligence Service', () => {
|
||||
it('should upload a document and process the result using Azure Document Intelligence API', async () => {
|
||||
const mockFileBuffer = Buffer.from('test file content');
|
||||
const mockBase64Source = mockFileBuffer.toString('base64');
|
||||
const mockOperationLocation = 'https://azure-ocr-endpoint.com/operation';
|
||||
const mockResultUrl = 'https://azure-ocr-endpoint.com/result';
|
||||
const mockFinalResult = { analyzeResult: { content: 'Final analysis result' } };
|
||||
beforeEach(() => {
|
||||
mockAxios.reset();
|
||||
fs.readFileSync.mockReset();
|
||||
});
|
||||
|
||||
it('should upload and poll until it gets the Markdown result', async () => {
|
||||
const mockFileBuffer = Buffer.from('test file content');
|
||||
const mockBase64 = mockFileBuffer.toString('base64');
|
||||
const mockOpLocation = 'https://azure-ocr-endpoint.com/operations/123';
|
||||
const mockResultUrl = 'https://azure-ocr-endpoint.com/results/123';
|
||||
const mockFinal = { analyzeResult: { content: 'Final analysis result' } };
|
||||
|
||||
// fs.readFileSync returns our buffer
|
||||
fs.readFileSync.mockReturnValue(mockFileBuffer);
|
||||
|
||||
mockAxios.post
|
||||
.mockResolvedValueOnce({ headers: { 'Operation-Location': mockOperationLocation } }) // Initial upload
|
||||
.mockResolvedValueOnce({ data: { status: 'succeeded', resultUrl: mockResultUrl } }); // Polling success
|
||||
// First axios.post => returns Operation-Location header
|
||||
mockAxios.post.mockResolvedValueOnce({
|
||||
headers: { 'Operation-Location': mockOpLocation },
|
||||
});
|
||||
|
||||
// First axios.get => poll success, returns status + resultUrl
|
||||
// Second axios.get => fetch final result
|
||||
mockAxios.get
|
||||
.mockResolvedValueOnce({ data: { status: 'succeeded', resultUrl: mockResultUrl } }) // Polling
|
||||
.mockResolvedValueOnce({ data: mockFinalResult }); // Final result fetch
|
||||
.mockResolvedValueOnce({ data: { status: 'succeeded', resultUrl: mockResultUrl } })
|
||||
.mockResolvedValueOnce({ data: mockFinal });
|
||||
|
||||
const result = await uploadAzureDocumentIntelligence({
|
||||
filePath: '/path/to/test.pdf',
|
||||
apiKey: 'azure-api-key',
|
||||
endpoint: 'https://azure-ocr-endpoint.com',
|
||||
endpoint: 'https://azure-ocr-endpoint.com/',
|
||||
modelId: 'prebuilt-layout',
|
||||
});
|
||||
|
||||
// Validate read
|
||||
expect(fs.readFileSync).toHaveBeenCalledWith('/path/to/test.pdf');
|
||||
|
||||
// Validate initial POST
|
||||
expect(mockAxios.post).toHaveBeenCalledWith(
|
||||
'https://azure-ocr-endpoint.com/documentModels/prebuilt-invoice:analyze',
|
||||
{ base64Source: mockBase64Source },
|
||||
'https://azure-ocr-endpoint.com/documentModels/prebuilt-layout:analyze?outputContentFormat=markdown',
|
||||
{ base64Source: mockBase64 },
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
'Ocp-Apim-Subscription-Key': 'azure-api-key',
|
||||
|
|
@ -77,8 +81,23 @@ describe('AzureDocumentIntelligence Service', () => {
|
|||
}),
|
||||
}),
|
||||
);
|
||||
expect(mockAxios.get).toHaveBeenCalledWith(mockOperationLocation, expect.any(Object));
|
||||
expect(mockAxios.get).toHaveBeenCalledWith(mockResultUrl, expect.any(Object));
|
||||
expect(result).toEqual(mockFinalResult.analyzeResult.content);
|
||||
|
||||
// Validate polling GET
|
||||
expect(mockAxios.get).toHaveBeenCalledWith(
|
||||
mockOpLocation,
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({ 'Ocp-Apim-Subscription-Key': 'azure-api-key' }),
|
||||
}),
|
||||
);
|
||||
|
||||
// Validate final fetch GET
|
||||
expect(mockAxios.get).toHaveBeenCalledWith(
|
||||
mockResultUrl,
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({ 'Ocp-Apim-Subscription-Key': 'azure-api-key' }),
|
||||
}),
|
||||
);
|
||||
|
||||
expect(result).toEqual('Final analysis result');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue