🔍 feat: Mistral OCR API / Upload Files as Text (#6274)

* refactor: move `loadAuthValues` to `~/services/Tools/credentials`

* feat: add createAxiosInstance function to configure axios with proxy support

* WIP: First pass mistral ocr

* refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic

* refactor: improve document formatting in encodeAndFormat function

* refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config)

* fix: update getFiles call to include files with `text` property as well

* refactor: move file handling to `initializeAgentOptions`

* refactor: enhance addImageURLs method to handle OCR text and improve message formatting

* refactor: update message formatting to handle OCR text in various content types

* refactor: remove unused resendFiles property from compactAgentsSchema

* fix: add error handling for Mistral OCR document upload and logging

* refactor: integrate OCR capability into file upload options and configuration

* refactor: skip processing for text source files in delete request, as they are directly tied to database

* feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling

* fix: source icon styling

* wip: first pass, frontend file context agent resources

* refactor: add hover card with contextual information for File Context (OCR) in FileContext component

* feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization

* feat: implement OCR config; fix: agent resource deletion for ocr files

* feat: enhance agent initialization by adding OCR capability check in resource priming

* ci: fix `~/config` module mock

* ci: add OCR property expectation in AppService tests

* refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed

* ci: add unit test to ensure environment variable references are not parsed in OCR config

* refactor: disable base64 image inclusion in OCR request

* refactor: enhance OCR configuration handling by validating environment variables and providing defaults

* refactor: use file stream from disk for mistral ocr api
This commit is contained in:
Danny Avila 2025-03-10 17:23:46 -04:00 committed by GitHub
parent 9db00edfc4
commit ded3cd8876
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 1621 additions and 131 deletions

View file

@ -120,6 +120,7 @@ describe('AppService', () => {
},
},
paths: expect.anything(),
ocr: expect.anything(),
imageOutputType: expect.any(String),
fileConfig: undefined,
secureImageLinks: undefined,
@ -588,4 +589,33 @@ describe('AppService updating app.locals and issuing warnings', () => {
);
});
});
it('should not parse environment variable references in OCR config', async () => {
// Mock custom configuration with env variable references in OCR config
const mockConfig = {
ocr: {
apiKey: '${OCR_API_KEY_CUSTOM_VAR_NAME}',
baseURL: '${OCR_BASEURL_CUSTOM_VAR_NAME}',
strategy: 'mistral_ocr',
mistralModel: 'mistral-medium',
},
};
require('./Config/loadCustomConfig').mockImplementationOnce(() => Promise.resolve(mockConfig));
// Set actual environment variables with different values
process.env.OCR_API_KEY_CUSTOM_VAR_NAME = 'actual-api-key';
process.env.OCR_BASEURL_CUSTOM_VAR_NAME = 'https://actual-ocr-url.com';
// Initialize app
const app = { locals: {} };
await AppService(app);
// Verify that the raw string references were preserved and not interpolated
expect(app.locals.ocr).toBeDefined();
expect(app.locals.ocr.apiKey).toEqual('${OCR_API_KEY_CUSTOM_VAR_NAME}');
expect(app.locals.ocr.baseURL).toEqual('${OCR_BASEURL_CUSTOM_VAR_NAME}');
expect(app.locals.ocr.strategy).toEqual('mistral_ocr');
expect(app.locals.ocr.mistralModel).toEqual('mistral-medium');
});
});