👓 feat: Vision Support for Assistants (#2195)

* refactor(assistants/chat): use promises to speed up initialization, initialize shared variables, include `attachedFileIds` to streamRunManager

* chore: additional typedefs

* fix(OpenAIClient): handle edge case where attachments promise is resolved

* feat: createVisionPrompt

* feat: Vision Support for Assistants
This commit is contained in:
Danny Avila 2024-03-24 23:43:00 -04:00 committed by GitHub
parent 1f0fb497f8
commit 798e8763d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 376 additions and 100 deletions

View file

@ -1,4 +1,9 @@
import { ToolCallTypes, ContentTypes, imageGenTools } from 'librechat-data-provider';
import {
ToolCallTypes,
ContentTypes,
imageGenTools,
isImageVisionTool,
} from 'librechat-data-provider';
import type { TMessageContentParts, TMessage } from 'librechat-data-provider';
import type { TDisplayProps } from '~/common';
import { ErrorMessage } from './MessageContent';
@ -96,6 +101,25 @@ export default function Part({
part[ContentTypes.TOOL_CALL].type === ToolCallTypes.FUNCTION
) {
const toolCall = part[ContentTypes.TOOL_CALL];
if (isImageVisionTool(toolCall)) {
if (isSubmitting && showCursor) {
return (
<Container>
<div className="markdown prose dark:prose-invert light dark:text-gray-70 my-1 w-full break-words">
<DisplayMessage
text={''}
isCreatedByUser={message.isCreatedByUser}
message={message}
showCursor={showCursor}
/>
</div>
</Container>
);
}
return null;
}
return (
<ToolCall
initialProgress={toolCall.progress ?? 0.1}

View file

@ -8,6 +8,7 @@ import {
Capabilities,
EModelEndpoint,
actionDelimiter,
ImageVisionTool,
defaultAssistantFormValues,
} from 'librechat-data-provider';
import type { AssistantForm, AssistantPanelProps } from '~/common';
@ -82,6 +83,10 @@ export default function AssistantPanel({
() => assistants?.capabilities?.includes(Capabilities.code_interpreter),
[assistants],
);
const imageVisionEnabled = useMemo(
() => assistants?.capabilities?.includes(Capabilities.image_vision),
[assistants],
);
useEffect(() => {
if (model && !retrievalModels.has(model)) {
@ -157,6 +162,9 @@ export default function AssistantPanel({
if (data.retrieval) {
tools.push({ type: Tools.retrieval });
}
if (data.image_vision) {
tools.push(ImageVisionTool);
}
const {
name,
@ -374,6 +382,37 @@ export default function AssistantPanel({
</label>
</div>
)}
{imageVisionEnabled && (
<div className="flex items-center">
<Controller
name={Capabilities.image_vision}
control={control}
render={({ field }) => (
<Checkbox
{...field}
checked={field.value}
onCheckedChange={field.onChange}
className="relative float-left mr-2 inline-flex h-4 w-4 cursor-pointer"
value={field?.value?.toString()}
/>
)}
/>
<label
className="form-check-label text-token-text-primary w-full cursor-pointer"
htmlFor={Capabilities.image_vision}
onClick={() =>
setValue(Capabilities.image_vision, !getValues(Capabilities.image_vision), {
shouldDirty: true,
})
}
>
<div className="flex items-center">
{localize('com_assistants_image_vision')}
<QuestionMark />
</div>
</label>
</div>
)}
{retrievalEnabled && (
<div className="flex items-center">
<Controller
@ -417,9 +456,9 @@ export default function AssistantPanel({
${actionsEnabled ? localize('com_assistants_actions') : ''}`}
</label>
<div className="space-y-1">
{functions.map((func) => (
{functions.map((func, i) => (
<AssistantTool
key={func}
key={`${func}-${i}-${assistant_id}`}
tool={func}
allTools={allTools}
assistant_id={assistant_id}

View file

@ -3,6 +3,8 @@ import { useCallback, useEffect, useRef } from 'react';
import {
defaultAssistantFormValues,
defaultOrderQuery,
isImageVisionTool,
Capabilities,
FileSources,
} from 'librechat-data-provider';
import type { UseFormReset } from 'react-hook-form';
@ -13,7 +15,7 @@ import SelectDropDown from '~/components/ui/SelectDropDown';
import { useListAssistantsQuery } from '~/data-provider';
import { useFileMapContext } from '~/Providers';
import { useLocalize } from '~/hooks';
import { cn } from '~/utils/';
import { cn } from '~/utils';
const keys = new Set(['name', 'id', 'description', 'instructions', 'model']);
@ -87,20 +89,21 @@ export default function AssistantSelect({
};
const actions: Actions = {
code_interpreter: false,
retrieval: false,
[Capabilities.code_interpreter]: false,
[Capabilities.image_vision]: false,
[Capabilities.retrieval]: false,
};
assistant?.tools
?.filter((tool) => tool.type !== 'function')
?.map((tool) => tool.type)
?.filter((tool) => tool.type !== 'function' || isImageVisionTool(tool))
?.map((tool) => tool?.function?.name || tool.type)
.forEach((tool) => {
actions[tool] = true;
});
const functions =
assistant?.tools
?.filter((tool) => tool.type === 'function')
?.filter((tool) => tool.type === 'function' && !isImageVisionTool(tool))
?.map((tool) => tool.function?.name ?? '') ?? [];
const formValues: Partial<AssistantForm & Actions> = {