🔍 feat: Mistral OCR API / Upload Files as Text (#6274)

* refactor: move `loadAuthValues` to `~/services/Tools/credentials`

* feat: add createAxiosInstance function to configure axios with proxy support

* WIP: First pass mistral ocr

* refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic

* refactor: improve document formatting in encodeAndFormat function

* refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config)

* fix: update getFiles call to include files with `text` property as well

* refactor: move file handling to `initializeAgentOptions`

* refactor: enhance addImageURLs method to handle OCR text and improve message formatting

* refactor: update message formatting to handle OCR text in various content types

* refactor: remove unused resendFiles property from compactAgentsSchema

* fix: add error handling for Mistral OCR document upload and logging

* refactor: integrate OCR capability into file upload options and configuration

* refactor: skip processing for text source files in delete request, as they are directly tied to database

* feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling

* fix: source icon styling

* wip: first pass, frontend file context agent resources

* refactor: add hover card with contextual information for File Context (OCR) in FileContext component

* feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization

* feat: implement OCR config; fix: agent resource deletion for ocr files

* feat: enhance agent initialization by adding OCR capability check in resource priming

* ci: fix `~/config` module mock

* ci: add OCR property expectation in AppService tests

* refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed

* ci: add unit test to ensure environment variable references are not parsed in OCR config

* refactor: disable base64 image inclusion in OCR request

* refactor: enhance OCR configuration handling by validating environment variables and providing defaults

* refactor: use file stream from disk for mistral ocr api
This commit is contained in:
Danny Avila 2025-03-10 17:23:46 -04:00 committed by GitHub
parent 9db00edfc4
commit ded3cd8876
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 1621 additions and 131 deletions

View file

@ -23,6 +23,7 @@ import { processAgentOption } from '~/utils';
import AdminSettings from './AdminSettings';
import DeleteButton from './DeleteButton';
import AgentAvatar from './AgentAvatar';
import FileContext from './FileContext';
import { Spinner } from '~/components';
import FileSearch from './FileSearch';
import ShareAgent from './ShareAgent';
@ -82,6 +83,10 @@ export default function AgentConfig({
() => agentsConfig?.capabilities.includes(AgentCapabilities.artifacts) ?? false,
[agentsConfig],
);
const ocrEnabled = useMemo(
() => agentsConfig?.capabilities.includes(AgentCapabilities.ocr) ?? false,
[agentsConfig],
);
const fileSearchEnabled = useMemo(
() => agentsConfig?.capabilities.includes(AgentCapabilities.file_search) ?? false,
[agentsConfig],
@ -91,6 +96,26 @@ export default function AgentConfig({
[agentsConfig],
);
const context_files = useMemo(() => {
if (typeof agent === 'string') {
return [];
}
if (agent?.id !== agent_id) {
return [];
}
if (agent.context_files) {
return agent.context_files;
}
const _agent = processAgentOption({
agent,
fileMap,
});
return _agent.context_files ?? [];
}, [agent, agent_id, fileMap]);
const knowledge_files = useMemo(() => {
if (typeof agent === 'string') {
return [];
@ -334,7 +359,7 @@ export default function AgentConfig({
</div>
</button>
</div>
{(codeEnabled || fileSearchEnabled || artifactsEnabled) && (
{(codeEnabled || fileSearchEnabled || artifactsEnabled || ocrEnabled) && (
<div className="mb-4 flex w-full flex-col items-start gap-3">
<label className="text-token-text-primary block font-medium">
{localize('com_assistants_capabilities')}
@ -345,6 +370,8 @@ export default function AgentConfig({
{fileSearchEnabled && <FileSearch agent_id={agent_id} files={knowledge_files} />}
{/* Artifacts */}
{artifactsEnabled && <Artifacts />}
{/* File Context (OCR) */}
{ocrEnabled && <FileContext agent_id={agent_id} files={context_files} />}
</div>
)}
{/* Agent Tools & Actions */}