mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-18 17:30:16 +01:00
🔍 feat: Mistral OCR API / Upload Files as Text (#6274)
* refactor: move `loadAuthValues` to `~/services/Tools/credentials` * feat: add createAxiosInstance function to configure axios with proxy support * WIP: First pass mistral ocr * refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic * refactor: improve document formatting in encodeAndFormat function * refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config) * fix: update getFiles call to include files with `text` property as well * refactor: move file handling to `initializeAgentOptions` * refactor: enhance addImageURLs method to handle OCR text and improve message formatting * refactor: update message formatting to handle OCR text in various content types * refactor: remove unused resendFiles property from compactAgentsSchema * fix: add error handling for Mistral OCR document upload and logging * refactor: integrate OCR capability into file upload options and configuration * refactor: skip processing for text source files in delete request, as they are directly tied to database * feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling * fix: source icon styling * wip: first pass, frontend file context agent resources * refactor: add hover card with contextual information for File Context (OCR) in FileContext component * feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization * feat: implement OCR config; fix: agent resource deletion for ocr files * feat: enhance agent initialization by adding OCR capability check in resource priming * ci: fix `~/config` module mock * ci: add OCR property expectation in AppService tests * refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed * ci: add unit test to ensure environment variable references are not parsed in OCR config * refactor: disable base64 image inclusion in OCR request * refactor: enhance OCR configuration handling by validating environment variables and providing defaults * refactor: use file stream from disk for mistral ocr api
This commit is contained in:
parent
9db00edfc4
commit
ded3cd8876
48 changed files with 1621 additions and 131 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import * as Ariakit from '@ariakit/react';
|
||||
import React, { useRef, useState, useMemo } from 'react';
|
||||
import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react';
|
||||
import { EToolResources, EModelEndpoint } from 'librechat-data-provider';
|
||||
import { FileSearch, ImageUpIcon, TerminalSquareIcon, FileType2Icon } from 'lucide-react';
|
||||
import { FileUpload, TooltipAnchor, DropdownPopup } from '~/components/ui';
|
||||
import { useGetEndpointsQuery } from '~/data-provider';
|
||||
import { AttachmentIcon } from '~/components/svg';
|
||||
|
|
@ -49,6 +49,17 @@ const AttachFile = ({ isRTL, disabled, handleFileChange }: AttachFileProps) => {
|
|||
},
|
||||
];
|
||||
|
||||
if (capabilities.includes(EToolResources.ocr)) {
|
||||
items.push({
|
||||
label: localize('com_ui_upload_ocr_text'),
|
||||
onClick: () => {
|
||||
setToolResource(EToolResources.ocr);
|
||||
handleUploadClick();
|
||||
},
|
||||
icon: <FileType2Icon className="icon-md" />,
|
||||
});
|
||||
}
|
||||
|
||||
if (capabilities.includes(EToolResources.file_search)) {
|
||||
items.push({
|
||||
label: localize('com_ui_upload_file_search'),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import React, { useMemo } from 'react';
|
||||
import { EModelEndpoint, EToolResources } from 'librechat-data-provider';
|
||||
import { FileSearch, ImageUpIcon, TerminalSquareIcon } from 'lucide-react';
|
||||
import { FileSearch, ImageUpIcon, FileType2Icon, TerminalSquareIcon } from 'lucide-react';
|
||||
import OGDialogTemplate from '~/components/ui/OGDialogTemplate';
|
||||
import { useGetEndpointsQuery } from '~/data-provider';
|
||||
import useLocalize from '~/hooks/useLocalize';
|
||||
|
|
@ -50,6 +50,12 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
|
|||
value: EToolResources.execute_code,
|
||||
icon: <TerminalSquareIcon className="icon-md" />,
|
||||
});
|
||||
} else if (capability === EToolResources.ocr) {
|
||||
_options.push({
|
||||
label: localize('com_ui_upload_ocr_text'),
|
||||
value: EToolResources.ocr,
|
||||
icon: <FileType2Icon className="icon-md" />,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ const FilePreview = ({
|
|||
};
|
||||
className?: string;
|
||||
}) => {
|
||||
const radius = 55; // Radius of the SVG circle
|
||||
const radius = 55;
|
||||
const circumference = 2 * Math.PI * radius;
|
||||
const progress = useProgress(
|
||||
file?.['progress'] ?? 1,
|
||||
|
|
@ -27,16 +27,15 @@ const FilePreview = ({
|
|||
(file as ExtendedFile | undefined)?.size ?? 1,
|
||||
);
|
||||
|
||||
// Calculate the offset based on the loading progress
|
||||
const offset = circumference - progress * circumference;
|
||||
const circleCSSProperties = {
|
||||
transition: 'stroke-dashoffset 0.5s linear',
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={cn('size-10 shrink-0 overflow-hidden rounded-xl', className)}>
|
||||
<div className={cn('relative size-10 shrink-0 overflow-hidden rounded-xl', className)}>
|
||||
<FileIcon file={file} fileType={fileType} />
|
||||
<SourceIcon source={file?.source} />
|
||||
<SourceIcon source={file?.source} isCodeFile={!!file?.['metadata']?.fileIdentifier} />
|
||||
{progress < 1 && (
|
||||
<ProgressCircle
|
||||
circumference={circumference}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { Terminal, Type, Database } from 'lucide-react';
|
||||
import { EModelEndpoint, FileSources } from 'librechat-data-provider';
|
||||
import { MinimalIcon } from '~/components/Endpoints';
|
||||
import { cn } from '~/utils';
|
||||
|
|
@ -6,9 +7,13 @@ const sourceToEndpoint = {
|
|||
[FileSources.openai]: EModelEndpoint.openAI,
|
||||
[FileSources.azure]: EModelEndpoint.azureOpenAI,
|
||||
};
|
||||
|
||||
const sourceToClassname = {
|
||||
[FileSources.openai]: 'bg-white/75 dark:bg-black/65',
|
||||
[FileSources.azure]: 'azure-bg-color opacity-85',
|
||||
[FileSources.execute_code]: 'bg-black text-white opacity-85',
|
||||
[FileSources.text]: 'bg-blue-100 dark:bg-blue-900 opacity-85 text-white',
|
||||
[FileSources.vectordb]: 'bg-yellow-100 dark:bg-yellow-900 opacity-85 text-white',
|
||||
};
|
||||
|
||||
const defaultClassName =
|
||||
|
|
@ -16,13 +21,41 @@ const defaultClassName =
|
|||
|
||||
export default function SourceIcon({
|
||||
source,
|
||||
isCodeFile,
|
||||
className = defaultClassName,
|
||||
}: {
|
||||
source?: FileSources;
|
||||
isCodeFile?: boolean;
|
||||
className?: string;
|
||||
}) {
|
||||
if (source === FileSources.local || source === FileSources.firebase) {
|
||||
return null;
|
||||
if (isCodeFile === true) {
|
||||
return (
|
||||
<div className={cn(className, sourceToClassname[FileSources.execute_code] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<Terminal className="h-3 w-3" />
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (source === FileSources.text) {
|
||||
return (
|
||||
<div className={cn(className, sourceToClassname[source] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<Type className="h-3 w-3" />
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (source === FileSources.vectordb) {
|
||||
return (
|
||||
<div className={cn(className, sourceToClassname[source] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<Database className="h-3 w-3" />
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const endpoint = sourceToEndpoint[source ?? ''];
|
||||
|
|
@ -31,7 +64,7 @@ export default function SourceIcon({
|
|||
return null;
|
||||
}
|
||||
return (
|
||||
<button type="button" className={cn(className, sourceToClassname[source ?? ''] ?? '')}>
|
||||
<div className={cn(className, sourceToClassname[source ?? ''] ?? '')}>
|
||||
<span className="flex items-center justify-center">
|
||||
<MinimalIcon
|
||||
endpoint={endpoint}
|
||||
|
|
@ -40,6 +73,6 @@ export default function SourceIcon({
|
|||
iconClassName="h-3 w-3"
|
||||
/>
|
||||
</span>
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import { processAgentOption } from '~/utils';
|
|||
import AdminSettings from './AdminSettings';
|
||||
import DeleteButton from './DeleteButton';
|
||||
import AgentAvatar from './AgentAvatar';
|
||||
import FileContext from './FileContext';
|
||||
import { Spinner } from '~/components';
|
||||
import FileSearch from './FileSearch';
|
||||
import ShareAgent from './ShareAgent';
|
||||
|
|
@ -82,6 +83,10 @@ export default function AgentConfig({
|
|||
() => agentsConfig?.capabilities.includes(AgentCapabilities.artifacts) ?? false,
|
||||
[agentsConfig],
|
||||
);
|
||||
const ocrEnabled = useMemo(
|
||||
() => agentsConfig?.capabilities.includes(AgentCapabilities.ocr) ?? false,
|
||||
[agentsConfig],
|
||||
);
|
||||
const fileSearchEnabled = useMemo(
|
||||
() => agentsConfig?.capabilities.includes(AgentCapabilities.file_search) ?? false,
|
||||
[agentsConfig],
|
||||
|
|
@ -91,6 +96,26 @@ export default function AgentConfig({
|
|||
[agentsConfig],
|
||||
);
|
||||
|
||||
const context_files = useMemo(() => {
|
||||
if (typeof agent === 'string') {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (agent?.id !== agent_id) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (agent.context_files) {
|
||||
return agent.context_files;
|
||||
}
|
||||
|
||||
const _agent = processAgentOption({
|
||||
agent,
|
||||
fileMap,
|
||||
});
|
||||
return _agent.context_files ?? [];
|
||||
}, [agent, agent_id, fileMap]);
|
||||
|
||||
const knowledge_files = useMemo(() => {
|
||||
if (typeof agent === 'string') {
|
||||
return [];
|
||||
|
|
@ -334,7 +359,7 @@ export default function AgentConfig({
|
|||
</div>
|
||||
</button>
|
||||
</div>
|
||||
{(codeEnabled || fileSearchEnabled || artifactsEnabled) && (
|
||||
{(codeEnabled || fileSearchEnabled || artifactsEnabled || ocrEnabled) && (
|
||||
<div className="mb-4 flex w-full flex-col items-start gap-3">
|
||||
<label className="text-token-text-primary block font-medium">
|
||||
{localize('com_assistants_capabilities')}
|
||||
|
|
@ -345,6 +370,8 @@ export default function AgentConfig({
|
|||
{fileSearchEnabled && <FileSearch agent_id={agent_id} files={knowledge_files} />}
|
||||
{/* Artifacts */}
|
||||
{artifactsEnabled && <Artifacts />}
|
||||
{/* File Context (OCR) */}
|
||||
{ocrEnabled && <FileContext agent_id={agent_id} files={context_files} />}
|
||||
</div>
|
||||
)}
|
||||
{/* Agent Tools & Actions */}
|
||||
|
|
|
|||
128
client/src/components/SidePanel/Agents/FileContext.tsx
Normal file
128
client/src/components/SidePanel/Agents/FileContext.tsx
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
import { useState, useRef } from 'react';
|
||||
import {
|
||||
EModelEndpoint,
|
||||
EToolResources,
|
||||
mergeFileConfig,
|
||||
fileConfig as defaultFileConfig,
|
||||
} from 'librechat-data-provider';
|
||||
import type { ExtendedFile } from '~/common';
|
||||
import { useFileHandling, useLocalize, useLazyEffect } from '~/hooks';
|
||||
import FileRow from '~/components/Chat/Input/Files/FileRow';
|
||||
import { useGetFileConfig } from '~/data-provider';
|
||||
import { HoverCard, HoverCardContent, HoverCardPortal, HoverCardTrigger } from '~/components/ui';
|
||||
import { AttachmentIcon, CircleHelpIcon } from '~/components/svg';
|
||||
import { useChatContext } from '~/Providers';
|
||||
import { ESide } from '~/common';
|
||||
|
||||
export default function FileContext({
|
||||
agent_id,
|
||||
files: _files,
|
||||
}: {
|
||||
agent_id: string;
|
||||
files?: [string, ExtendedFile][];
|
||||
}) {
|
||||
const localize = useLocalize();
|
||||
const { setFilesLoading } = useChatContext();
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const [files, setFiles] = useState<Map<string, ExtendedFile>>(new Map());
|
||||
|
||||
const { data: fileConfig = defaultFileConfig } = useGetFileConfig({
|
||||
select: (data) => mergeFileConfig(data),
|
||||
});
|
||||
|
||||
const { handleFileChange } = useFileHandling({
|
||||
overrideEndpoint: EModelEndpoint.agents,
|
||||
additionalMetadata: { agent_id, tool_resource: EToolResources.ocr },
|
||||
fileSetter: setFiles,
|
||||
});
|
||||
|
||||
useLazyEffect(
|
||||
() => {
|
||||
if (_files) {
|
||||
setFiles(new Map(_files));
|
||||
}
|
||||
},
|
||||
[_files],
|
||||
750,
|
||||
);
|
||||
|
||||
const endpointFileConfig = fileConfig.endpoints[EModelEndpoint.agents];
|
||||
const isUploadDisabled = endpointFileConfig.disabled ?? false;
|
||||
|
||||
if (isUploadDisabled) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const handleButtonClick = () => {
|
||||
// necessary to reset the input
|
||||
if (fileInputRef.current) {
|
||||
fileInputRef.current.value = '';
|
||||
}
|
||||
fileInputRef.current?.click();
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="w-full">
|
||||
<HoverCard openDelay={50}>
|
||||
<div className="mb-2 flex items-center gap-2">
|
||||
<HoverCardTrigger asChild>
|
||||
<span className="flex items-center gap-2">
|
||||
<label className="text-token-text-primary block font-medium">
|
||||
{localize('com_agents_file_context')}
|
||||
</label>
|
||||
<CircleHelpIcon className="h-4 w-4 text-text-tertiary" />
|
||||
</span>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardPortal>
|
||||
<HoverCardContent side={ESide.Top} className="w-80">
|
||||
<div className="space-y-2">
|
||||
<p className="text-sm text-text-secondary">
|
||||
{localize('com_agents_file_context_info')}
|
||||
</p>
|
||||
</div>
|
||||
</HoverCardContent>
|
||||
</HoverCardPortal>
|
||||
</div>
|
||||
</HoverCard>
|
||||
<div className="flex flex-col gap-3">
|
||||
{/* File Context (OCR) Files */}
|
||||
<FileRow
|
||||
files={files}
|
||||
setFiles={setFiles}
|
||||
setFilesLoading={setFilesLoading}
|
||||
agent_id={agent_id}
|
||||
tool_resource={EToolResources.ocr}
|
||||
Wrapper={({ children }) => <div className="flex flex-wrap gap-2">{children}</div>}
|
||||
/>
|
||||
<div>
|
||||
<button
|
||||
type="button"
|
||||
disabled={!agent_id}
|
||||
className="btn btn-neutral border-token-border-light relative h-9 w-full rounded-lg font-medium"
|
||||
onClick={handleButtonClick}
|
||||
>
|
||||
<div className="flex w-full items-center justify-center gap-1">
|
||||
<AttachmentIcon className="text-token-text-primary h-4 w-4" />
|
||||
<input
|
||||
multiple={true}
|
||||
type="file"
|
||||
style={{ display: 'none' }}
|
||||
tabIndex={-1}
|
||||
ref={fileInputRef}
|
||||
disabled={!agent_id}
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
{localize('com_ui_upload_file_context')}
|
||||
</div>
|
||||
</button>
|
||||
</div>
|
||||
{/* Disabled Message */}
|
||||
{agent_id ? null : (
|
||||
<div className="text-xs text-text-secondary">
|
||||
{localize('com_agents_file_context_disabled')}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,21 +1,23 @@
|
|||
import { ArrowUpDown } from 'lucide-react';
|
||||
import type { ColumnDef } from '@tanstack/react-table';
|
||||
import type { TFile } from 'librechat-data-provider';
|
||||
import useLocalize from '~/hooks/useLocalize';
|
||||
import PanelFileCell from './PanelFileCell';
|
||||
import { Button } from '~/components/ui';
|
||||
import { formatDate } from '~/utils';
|
||||
|
||||
export const columns: ColumnDef<TFile>[] = [
|
||||
export const columns: ColumnDef<TFile | undefined>[] = [
|
||||
{
|
||||
accessorKey: 'filename',
|
||||
header: ({ column }) => {
|
||||
const localize = useLocalize();
|
||||
return (
|
||||
<Button
|
||||
variant="ghost"
|
||||
className="hover:bg-surface-hover"
|
||||
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
|
||||
>
|
||||
Name
|
||||
{localize('com_ui_name')}
|
||||
<ArrowUpDown className="ml-2 h-4 w-4" />
|
||||
</Button>
|
||||
);
|
||||
|
|
@ -31,20 +33,21 @@ export const columns: ColumnDef<TFile>[] = [
|
|||
size: '10%',
|
||||
},
|
||||
header: ({ column }) => {
|
||||
const localize = useLocalize();
|
||||
return (
|
||||
<Button
|
||||
variant="ghost"
|
||||
className="hover:bg-surface-hover"
|
||||
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
|
||||
>
|
||||
Date
|
||||
{localize('com_ui_date')}
|
||||
<ArrowUpDown className="ml-2 h-4 w-4" />
|
||||
</Button>
|
||||
);
|
||||
},
|
||||
cell: ({ row }) => (
|
||||
<span className="flex justify-end text-xs">
|
||||
{formatDate(row.original.updatedAt?.toString() ?? '')}
|
||||
{formatDate(row.original?.updatedAt?.toString() ?? '')}
|
||||
</span>
|
||||
),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import { getFileType } from '~/utils';
|
|||
|
||||
export default function PanelFileCell({ row }: { row: Row<TFile | undefined> }) {
|
||||
const file = row.original;
|
||||
|
||||
return (
|
||||
<div className="flex w-full items-center gap-2">
|
||||
{file?.type.startsWith('image') === true ? (
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ export default function DataTable<TData, TValue>({ columns, data }: DataTablePro
|
|||
filename: fileData.filename,
|
||||
source: fileData.source,
|
||||
size: fileData.bytes,
|
||||
metadata: fileData.metadata,
|
||||
});
|
||||
},
|
||||
[addFile, fileMap, conversation, localize, showToast, fileConfig.endpoints],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue