🔄 refactor: Convert OCR Tool Resource to Context (#9699)

* WIP: conversion of `ocr` to `context`

* refactor: make `primeResources` backwards-compatible for `ocr` tool_resources

* refactor: Convert legacy `ocr` tool resource to `context` in agent updates

- Implemented conversion logic to replace `ocr` with `context` in both incoming updates and existing agent data.
- Merged file IDs and files from `ocr` into `context` while ensuring deduplication.
- Updated tools array to reflect the change from `ocr` to `context`.

* refactor: Enhance context file handling in agent processing

- Updated the logic for managing context files by consolidating file IDs from both `ocr` and `context` resources.
- Improved backwards compatibility by ensuring that context files are correctly populated and handled.
- Simplified the iteration over context files for better readability and maintainability.

* refactor: Enhance tool_resources handling in primeResources

- Added tests to verify the deletion behavior of tool_resources fields, ensuring original objects remain unchanged.
- Implemented logic to delete `ocr` and `context` fields after fetching and re-categorizing files.
- Preserved context field when the context capability is disabled, ensuring correct behavior in various scenarios.

* refactor: Replace `ocrEnabled` with `contextEnabled` in AgentConfig

* refactor: Adjust legacy tool handling order for improved clarity

* refactor: Implement OCR to context conversion functions and remove original conversion logic in update agent handling

* refactor: Move contextEnabled declaration to maintain consistent order in capabilities

* refactor: Update localization keys for file context to improve clarity and accuracy

* chore: Update localization key for file context information to improve clarity
This commit is contained in:
Danny Avila 2025-09-18 20:06:59 -04:00 committed by GitHub
parent 89d12a8ccd
commit 81139046e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 1281 additions and 76 deletions

View file

@ -94,11 +94,11 @@ const AttachFileMenu = ({
},
];
if (capabilities.ocrEnabled) {
if (capabilities.contextEnabled) {
items.push({
label: localize('com_ui_upload_ocr_text'),
onClick: () => {
setToolResource(EToolResources.ocr);
setToolResource(EToolResources.context);
onAction();
},
icon: <FileType2Icon className="icon-md" />,

View file

@ -64,10 +64,10 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
icon: <TerminalSquareIcon className="icon-md" />,
});
}
if (capabilities.ocrEnabled) {
if (capabilities.contextEnabled) {
_options.push({
label: localize('com_ui_upload_ocr_text'),
value: EToolResources.ocr,
value: EToolResources.context,
icon: <FileType2Icon className="icon-md" />,
});
}

View file

@ -79,9 +79,9 @@ export default function AgentConfig({ createMutation }: Pick<AgentPanelProps, 'c
}, [fileMap, agentFiles]);
const {
ocrEnabled,
codeEnabled,
toolsEnabled,
contextEnabled,
actionsEnabled,
artifactsEnabled,
webSearchEnabled,
@ -291,7 +291,7 @@ export default function AgentConfig({ createMutation }: Pick<AgentPanelProps, 'c
{(codeEnabled ||
fileSearchEnabled ||
artifactsEnabled ||
ocrEnabled ||
contextEnabled ||
webSearchEnabled) && (
<div className="mb-4 flex w-full flex-col items-start gap-3">
<label className="text-token-text-primary block font-medium">
@ -301,8 +301,8 @@ export default function AgentConfig({ createMutation }: Pick<AgentPanelProps, 'c
{codeEnabled && <CodeForm agent_id={agent_id} files={code_files} />}
{/* Web Search */}
{webSearchEnabled && <SearchForm />}
{/* File Context (OCR) */}
{ocrEnabled && <FileContext agent_id={agent_id} files={context_files} />}
{/* File Context */}
{contextEnabled && <FileContext agent_id={agent_id} files={context_files} />}
{/* Artifacts */}
{artifactsEnabled && <Artifacts />}
{/* File Search */}

View file

@ -47,7 +47,7 @@ export default function FileContext({
const { handleFileChange } = useFileHandling({
overrideEndpoint: EModelEndpoint.agents,
additionalMetadata: { agent_id, tool_resource: EToolResources.ocr },
additionalMetadata: { agent_id, tool_resource: EToolResources.context },
fileSetter: setFiles,
});
const { handleSharePointFiles, isProcessing, downloadProgress } = useSharePointFileHandling({
@ -113,7 +113,7 @@ export default function FileContext({
<HoverCardTrigger asChild>
<span className="flex items-center gap-2">
<label className="text-token-text-primary block font-medium">
{localize('com_agents_file_context')}
{localize('com_agents_file_context_label')}
</label>
<CircleHelpIcon className="h-4 w-4 text-text-tertiary" />
</span>
@ -122,7 +122,7 @@ export default function FileContext({
<HoverCardContent side={ESide.Top} className="w-80">
<div className="space-y-2">
<p className="text-sm text-text-secondary">
{localize('com_agents_file_context_info')}
{localize('com_agents_file_context_description')}
</p>
</div>
</HoverCardContent>
@ -130,13 +130,13 @@ export default function FileContext({
</div>
</HoverCard>
<div className="flex flex-col gap-3">
{/* File Context (OCR) Files */}
{/* File Context Files */}
<FileRow
files={files}
setFiles={setFiles}
setFilesLoading={setFilesLoading}
agent_id={agent_id}
tool_resource={EToolResources.ocr}
tool_resource={EToolResources.context}
Wrapper={({ children }) => <div className="flex flex-wrap gap-2">{children}</div>}
/>
<div>

View file

@ -142,7 +142,6 @@ describe('useAgentToolPermissions', () => {
(useGetAgentByIdQuery as jest.Mock).mockReturnValue({ data: undefined });
const ephemeralAgent = {
[EToolResources.ocr]: true,
[EToolResources.file_search]: true,
};

View file

@ -6,6 +6,7 @@ interface AgentCapabilitiesResult {
actionsEnabled: boolean;
artifactsEnabled: boolean;
ocrEnabled: boolean;
contextEnabled: boolean;
fileSearchEnabled: boolean;
webSearchEnabled: boolean;
codeEnabled: boolean;
@ -34,6 +35,11 @@ export default function useAgentCapabilities(
[capabilities],
);
const contextEnabled = useMemo(
() => capabilities?.includes(AgentCapabilities.context) ?? false,
[capabilities],
);
const fileSearchEnabled = useMemo(
() => capabilities?.includes(AgentCapabilities.file_search) ?? false,
[capabilities],
@ -54,6 +60,7 @@ export default function useAgentCapabilities(
codeEnabled,
toolsEnabled,
actionsEnabled,
contextEnabled,
artifactsEnabled,
webSearchEnabled,
fileSearchEnabled,

View file

@ -71,7 +71,7 @@ export default function useDragHelpers() {
const capabilities = agentsConfig?.capabilities ?? defaultAgentCapabilities;
const fileSearchEnabled = capabilities.includes(AgentCapabilities.file_search) === true;
const codeEnabled = capabilities.includes(AgentCapabilities.execute_code) === true;
const ocrEnabled = capabilities.includes(AgentCapabilities.ocr) === true;
const contextEnabled = capabilities.includes(AgentCapabilities.context) === true;
/** Get agent permissions at drop time */
const agentId = conversationRef.current?.agent_id;
@ -99,7 +99,7 @@ export default function useDragHelpers() {
allImages ||
(fileSearchEnabled && fileSearchAllowedByAgent) ||
(codeEnabled && codeAllowedByAgent) ||
ocrEnabled;
contextEnabled;
if (!shouldShowModal) {
// Fallback: directly handle files without showing modal

View file

@ -59,9 +59,9 @@
"com_agents_error_timeout_suggestion": "Please check your internet connection and try again.",
"com_agents_error_timeout_title": "Connection Timeout",
"com_agents_error_title": "Something went wrong",
"com_agents_file_context": "File Context (OCR)",
"com_agents_file_context_label": "File Context",
"com_agents_file_context_disabled": "Agent must be created before uploading files for File Context.",
"com_agents_file_context_info": "Files uploaded as \"Context\" are processed using OCR to extract text, which is then added to the Agent's instructions. Ideal for documents, images with text, or PDFs where you need the full text content of a file",
"com_agents_file_context_description": "Files uploaded as \"Context\" are parsed as text to supplement the Agent's instructions. If OCR is available, or if configured for the uploaded filetype, the process is used to extract text. Ideal for documents, images with text, or PDFs where you need the full text content of a file",
"com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.",
"com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.",
"com_agents_grid_announcement": "Showing {{count}} agents in {{category}} category",

View file

@ -253,7 +253,7 @@ export const validateFiles = ({
}
let mimeTypesToCheck = supportedMimeTypes;
if (toolResource === EToolResources.ocr) {
if (toolResource === EToolResources.context) {
mimeTypesToCheck = [
...(fileConfig?.text?.supportedMimeTypes || []),
...(fileConfig?.ocr?.supportedMimeTypes || []),

View file

@ -62,14 +62,19 @@ export const processAgentOption = ({
fileMap?: Record<string, TFile | undefined>;
}): TAgentOption => {
const isGlobal = _agent?.isPublic ?? false;
const context_files = _agent?.tool_resources?.context?.file_ids ?? [];
if (_agent?.tool_resources?.ocr?.file_ids) {
/** Backwards-compatibility */
context_files.push(..._agent.tool_resources.ocr.file_ids);
}
const agent: TAgentOption = {
...(_agent ?? ({} as Agent)),
label: _agent?.name ?? '',
value: _agent?.id ?? '',
icon: isGlobal ? <EarthIcon className="icon-md text-green-400" /> : null,
context_files: _agent?.tool_resources?.ocr?.file_ids
? ([] as Array<[string, ExtendedFile]>)
: undefined,
context_files: context_files.length > 0 ? ([] as Array<[string, ExtendedFile]>) : undefined,
knowledge_files: _agent?.tool_resources?.file_search?.file_ids
? ([] as Array<[string, ExtendedFile]>)
: undefined,
@ -130,12 +135,12 @@ export const processAgentOption = ({
}
};
if (agent.context_files && _agent?.tool_resources?.ocr?.file_ids) {
_agent.tool_resources.ocr.file_ids.forEach((file_id) =>
if (agent.context_files && context_files.length > 0) {
context_files.forEach((file_id) =>
handleFile({
file_id,
list: agent.context_files,
tool_resource: EToolResources.ocr,
tool_resource: EToolResources.context,
}),
);
}