mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-28 22:28:51 +01:00
🔍 feat: Mistral OCR API / Upload Files as Text (#6274)
* refactor: move `loadAuthValues` to `~/services/Tools/credentials` * feat: add createAxiosInstance function to configure axios with proxy support * WIP: First pass mistral ocr * refactor: replace getConvoFiles with getToolFiles for improved file retrieval logic * refactor: improve document formatting in encodeAndFormat function * refactor: remove unused resendFiles parameter from buildOptions function (this option comes from the agent config) * fix: update getFiles call to include files with `text` property as well * refactor: move file handling to `initializeAgentOptions` * refactor: enhance addImageURLs method to handle OCR text and improve message formatting * refactor: update message formatting to handle OCR text in various content types * refactor: remove unused resendFiles property from compactAgentsSchema * fix: add error handling for Mistral OCR document upload and logging * refactor: integrate OCR capability into file upload options and configuration * refactor: skip processing for text source files in delete request, as they are directly tied to database * feat: add metadata field to ExtendedFile type and update PanelColumns and PanelTable components for localization and metadata handling * fix: source icon styling * wip: first pass, frontend file context agent resources * refactor: add hover card with contextual information for File Context (OCR) in FileContext component * feat: enhance file processing by integrating file retrieval for OCR resources in agent initialization * feat: implement OCR config; fix: agent resource deletion for ocr files * feat: enhance agent initialization by adding OCR capability check in resource priming * ci: fix `~/config` module mock * ci: add OCR property expectation in AppService tests * refactor: simplify OCR config loading by removing environment variable extraction, to be done when OCR is actually performed * ci: add unit test to ensure environment variable references are not parsed in OCR config * refactor: disable base64 image inclusion in OCR request * refactor: enhance OCR configuration handling by validating environment variables and providing defaults * refactor: use file stream from disk for mistral ocr api
This commit is contained in:
parent
9db00edfc4
commit
ded3cd8876
48 changed files with 1621 additions and 131 deletions
|
|
@ -168,6 +168,7 @@ export enum AgentCapabilities {
|
|||
artifacts = 'artifacts',
|
||||
actions = 'actions',
|
||||
tools = 'tools',
|
||||
ocr = 'ocr',
|
||||
}
|
||||
|
||||
export const defaultAssistantsVersion = {
|
||||
|
|
@ -242,6 +243,7 @@ export const agentsEndpointSChema = baseEndpointSchema.merge(
|
|||
AgentCapabilities.artifacts,
|
||||
AgentCapabilities.actions,
|
||||
AgentCapabilities.tools,
|
||||
AgentCapabilities.ocr,
|
||||
]),
|
||||
}),
|
||||
);
|
||||
|
|
@ -534,9 +536,22 @@ export type TStartupConfig = {
|
|||
bundlerURL?: string;
|
||||
};
|
||||
|
||||
export enum OCRStrategy {
|
||||
MISTRAL_OCR = 'mistral_ocr',
|
||||
CUSTOM_OCR = 'custom_ocr',
|
||||
}
|
||||
|
||||
export const ocrSchema = z.object({
|
||||
mistralModel: z.string().optional(),
|
||||
apiKey: z.string().optional().default('OCR_API_KEY'),
|
||||
baseURL: z.string().optional().default('OCR_BASEURL'),
|
||||
strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR),
|
||||
});
|
||||
|
||||
export const configSchema = z.object({
|
||||
version: z.string(),
|
||||
cache: z.boolean().default(true),
|
||||
ocr: ocrSchema.optional(),
|
||||
secureImageLinks: z.boolean().optional(),
|
||||
imageOutputType: z.nativeEnum(EImageOutputType).default(EImageOutputType.PNG),
|
||||
includedTools: z.array(z.string()).optional(),
|
||||
|
|
@ -1175,7 +1190,7 @@ export enum Constants {
|
|||
/** Key for the app's version. */
|
||||
VERSION = 'v0.7.7',
|
||||
/** Key for the Custom Config's version (librechat.yaml). */
|
||||
CONFIG_VERSION = '1.2.1',
|
||||
CONFIG_VERSION = '1.2.2',
|
||||
/** Standard value for the first message's `parentMessageId` value, to indicate no parent exists. */
|
||||
NO_PARENT = '00000000-0000-0000-0000-000000000000',
|
||||
/** Standard value for the initial conversationId before a request is sent */
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ export * from './file-config';
|
|||
export * from './artifacts';
|
||||
/* schema helpers */
|
||||
export * from './parsers';
|
||||
export * from './ocr';
|
||||
export * from './zod';
|
||||
/* custom/dynamic configurations */
|
||||
export * from './generate';
|
||||
|
|
|
|||
14
packages/data-provider/src/ocr.ts
Normal file
14
packages/data-provider/src/ocr.ts
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import type { TCustomConfig } from '../src/config';
|
||||
import { OCRStrategy } from '../src/config';
|
||||
|
||||
export function loadOCRConfig(config: TCustomConfig['ocr']): TCustomConfig['ocr'] {
|
||||
const baseURL = config?.baseURL ?? '';
|
||||
const apiKey = config?.apiKey ?? '';
|
||||
const mistralModel = config?.mistralModel ?? '';
|
||||
return {
|
||||
apiKey,
|
||||
baseURL,
|
||||
mistralModel,
|
||||
strategy: config?.strategy ?? OCRStrategy.MISTRAL_OCR,
|
||||
};
|
||||
}
|
||||
|
|
@ -1152,7 +1152,6 @@ export const compactAgentsSchema = tConversationSchema
|
|||
iconURL: true,
|
||||
greeting: true,
|
||||
agent_id: true,
|
||||
resendFiles: true,
|
||||
instructions: true,
|
||||
additional_instructions: true,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export enum EToolResources {
|
|||
code_interpreter = 'code_interpreter',
|
||||
execute_code = 'execute_code',
|
||||
file_search = 'file_search',
|
||||
ocr = 'ocr',
|
||||
}
|
||||
|
||||
export type Tool = {
|
||||
|
|
@ -163,7 +164,8 @@ export type AgentModelParameters = {
|
|||
|
||||
export interface AgentToolResources {
|
||||
execute_code?: ExecuteCodeResource;
|
||||
file_search?: AgentFileSearchResource;
|
||||
file_search?: AgentFileResource;
|
||||
ocr?: Omit<AgentFileResource, 'vector_store_ids'>;
|
||||
}
|
||||
export interface ExecuteCodeResource {
|
||||
/**
|
||||
|
|
@ -177,7 +179,7 @@ export interface ExecuteCodeResource {
|
|||
files?: Array<TFile>;
|
||||
}
|
||||
|
||||
export interface AgentFileSearchResource {
|
||||
export interface AgentFileResource {
|
||||
/**
|
||||
* The ID of the vector store attached to this agent. There
|
||||
* can be a maximum of 1 vector store attached to the agent.
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ export enum FileSources {
|
|||
s3 = 's3',
|
||||
vectordb = 'vectordb',
|
||||
execute_code = 'execute_code',
|
||||
mistral_ocr = 'mistral_ocr',
|
||||
text = 'text',
|
||||
}
|
||||
|
||||
export const checkOpenAIStorage = (source: string) =>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue