mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-05 18:18:51 +01:00
🔊 fix(tts): NotAllowedError (mobile/safari), Unsupported MediaSource type (firefox), Hide Audio Element (#2854)
* fix: hide audio element on mobile * chore: add tts docs link * fix: select voice option on first render * fix: NotAllowedError, prevent async playback for mobile triggers, consolidate MessageAudio code, user user-triggered unmutes * fix: Firefox/unsupported type for MediaSource hack * refactor(STT): make icon red when recording. consolidate logic to AudioRecorder component * fix: revert Redis changes to use separate client for sessions
This commit is contained in:
parent
dcd2e3e62d
commit
35ba4ba1a4
14 changed files with 421 additions and 130 deletions
|
|
@ -1,16 +1,46 @@
|
|||
import React from 'react';
|
||||
import { ListeningIcon, Spinner, SpeechIcon } from '~/components/svg';
|
||||
import { useEffect } from 'react';
|
||||
import type { UseFormReturn } from 'react-hook-form';
|
||||
import { TooltipProvider, Tooltip, TooltipTrigger, TooltipContent } from '~/components/ui/';
|
||||
import { useLocalize } from '~/hooks';
|
||||
import { ListeningIcon, Spinner } from '~/components/svg';
|
||||
import { useLocalize, useSpeechToText } from '~/hooks';
|
||||
import { globalAudioId } from '~/common';
|
||||
|
||||
export default function AudioRecorder({
|
||||
isListening,
|
||||
isLoading,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
textAreaRef,
|
||||
methods,
|
||||
ask,
|
||||
disabled,
|
||||
}: {
|
||||
textAreaRef: React.RefObject<HTMLTextAreaElement>;
|
||||
methods: UseFormReturn<{ text: string }>;
|
||||
ask: (data: { text: string }) => void;
|
||||
disabled: boolean;
|
||||
}) {
|
||||
const localize = useLocalize();
|
||||
|
||||
const handleTranscriptionComplete = (text: string) => {
|
||||
if (text) {
|
||||
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement;
|
||||
if (globalAudio) {
|
||||
console.log('Unmuting global audio');
|
||||
globalAudio.muted = false;
|
||||
}
|
||||
ask({ text });
|
||||
methods.reset({ text: '' });
|
||||
clearText();
|
||||
}
|
||||
};
|
||||
|
||||
const { isListening, isLoading, startRecording, stopRecording, speechText, clearText } =
|
||||
useSpeechToText(handleTranscriptionComplete);
|
||||
|
||||
useEffect(() => {
|
||||
if (textAreaRef.current) {
|
||||
textAreaRef.current.value = speechText;
|
||||
methods.setValue('text', speechText, { shouldValidate: true });
|
||||
}
|
||||
}, [speechText, methods, textAreaRef]);
|
||||
|
||||
const handleStartRecording = async () => {
|
||||
await startRecording();
|
||||
};
|
||||
|
|
@ -19,6 +49,16 @@ export default function AudioRecorder({
|
|||
await stopRecording();
|
||||
};
|
||||
|
||||
const renderIcon = () => {
|
||||
if (isListening) {
|
||||
return <ListeningIcon className="stroke-red-500" />;
|
||||
}
|
||||
if (isLoading) {
|
||||
return <Spinner className="stroke-gray-700 dark:stroke-gray-300" />;
|
||||
}
|
||||
return <ListeningIcon className="stroke-gray-700 dark:stroke-gray-300" />;
|
||||
};
|
||||
|
||||
return (
|
||||
<TooltipProvider delayDuration={250}>
|
||||
<Tooltip>
|
||||
|
|
@ -29,13 +69,7 @@ export default function AudioRecorder({
|
|||
className="absolute bottom-1.5 right-12 flex h-[30px] w-[30px] items-center justify-center rounded-lg p-0.5 transition-colors hover:bg-gray-200 dark:hover:bg-gray-700 md:bottom-3 md:right-12"
|
||||
type="button"
|
||||
>
|
||||
{isListening ? (
|
||||
<SpeechIcon className="stroke-gray-700 dark:stroke-gray-300" />
|
||||
) : isLoading ? (
|
||||
<Spinner className="stroke-gray-700 dark:stroke-gray-300" />
|
||||
) : (
|
||||
<ListeningIcon className="stroke-gray-700 dark:stroke-gray-300" />
|
||||
)}
|
||||
{renderIcon()}
|
||||
</button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="top" sideOffset={10}>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { useForm } from 'react-hook-form';
|
||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||
import { memo, useCallback, useRef, useMemo, useEffect } from 'react';
|
||||
import { memo, useCallback, useRef, useMemo } from 'react';
|
||||
import {
|
||||
supportsFiles,
|
||||
mergeFileConfig,
|
||||
|
|
@ -8,7 +8,7 @@ import {
|
|||
fileConfig as defaultFileConfig,
|
||||
} from 'librechat-data-provider';
|
||||
import { useChatContext, useAssistantsMapContext } from '~/Providers';
|
||||
import { useRequiresKey, useTextarea, useSpeechToText } from '~/hooks';
|
||||
import { useRequiresKey, useTextarea } from '~/hooks';
|
||||
import { TextareaAutosize } from '~/components/ui';
|
||||
import { useGetFileConfig } from '~/data-provider';
|
||||
import { cn, removeFocusOutlines } from '~/utils';
|
||||
|
|
@ -72,24 +72,6 @@ const ChatForm = ({ index = 0 }) => {
|
|||
const { endpoint: _endpoint, endpointType } = conversation ?? { endpoint: null };
|
||||
const endpoint = endpointType ?? _endpoint;
|
||||
|
||||
const handleTranscriptionComplete = (text: string) => {
|
||||
if (text) {
|
||||
ask({ text });
|
||||
methods.reset({ text: '' });
|
||||
clearText();
|
||||
}
|
||||
};
|
||||
|
||||
const { isListening, isLoading, startRecording, stopRecording, speechText, clearText } =
|
||||
useSpeechToText(handleTranscriptionComplete);
|
||||
|
||||
useEffect(() => {
|
||||
if (textAreaRef.current) {
|
||||
textAreaRef.current.value = speechText;
|
||||
methods.setValue('text', speechText, { shouldValidate: true });
|
||||
}
|
||||
}, [speechText, methods]);
|
||||
|
||||
const { data: fileConfig = defaultFileConfig } = useGetFileConfig({
|
||||
select: (data) => mergeFileConfig(data),
|
||||
});
|
||||
|
|
@ -183,11 +165,10 @@ const ChatForm = ({ index = 0 }) => {
|
|||
)}
|
||||
{SpeechToText && (
|
||||
<AudioRecorder
|
||||
isListening={isListening}
|
||||
isLoading={isLoading}
|
||||
startRecording={startRecording}
|
||||
stopRecording={stopRecording}
|
||||
disabled={!!disableInputs}
|
||||
textAreaRef={textAreaRef}
|
||||
ask={submitMessage}
|
||||
methods={methods}
|
||||
/>
|
||||
)}
|
||||
{TextToSpeech && automaticPlayback && <StreamAudio index={index} />}
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ export default function StreamAudio({ index = 0 }) {
|
|||
return;
|
||||
}
|
||||
|
||||
console.log('Fetching audio...');
|
||||
console.log('Fetching audio...', navigator.userAgent);
|
||||
const response = await fetch('/api/files/tts', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${token}` },
|
||||
|
|
@ -103,8 +103,14 @@ export default function StreamAudio({ index = 0 }) {
|
|||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const mediaSource = new MediaSourceAppender('audio/mpeg');
|
||||
setGlobalAudioURL(mediaSource.mediaSourceUrl);
|
||||
|
||||
const type = 'audio/mpeg';
|
||||
const browserSupportsType = MediaSource.isTypeSupported(type);
|
||||
let mediaSource: MediaSourceAppender | undefined;
|
||||
if (browserSupportsType) {
|
||||
mediaSource = new MediaSourceAppender(type);
|
||||
setGlobalAudioURL(mediaSource.mediaSourceUrl);
|
||||
}
|
||||
setAudioRunId(activeRunId);
|
||||
|
||||
let done = false;
|
||||
|
|
@ -120,7 +126,7 @@ export default function StreamAudio({ index = 0 }) {
|
|||
if (cacheTTS && value) {
|
||||
chunks.push(value);
|
||||
}
|
||||
if (value) {
|
||||
if (value && mediaSource) {
|
||||
mediaSource.addData(value);
|
||||
}
|
||||
done = readerDone;
|
||||
|
|
@ -136,8 +142,19 @@ export default function StreamAudio({ index = 0 }) {
|
|||
if (!cacheKey) {
|
||||
throw new Error('Cache key not found');
|
||||
}
|
||||
const audioBlob = new Blob(chunks, { type: 'audio/mpeg' });
|
||||
cache.put(cacheKey, new Response(audioBlob));
|
||||
const audioBlob = new Blob(chunks, { type });
|
||||
const cachedResponse = new Response(audioBlob);
|
||||
await cache.put(cacheKey, cachedResponse);
|
||||
if (!browserSupportsType) {
|
||||
const unconsumedResponse = await cache.match(cacheKey);
|
||||
if (!unconsumedResponse) {
|
||||
throw new Error('Failed to fetch audio from cache');
|
||||
}
|
||||
const audioBlob = await unconsumedResponse.blob();
|
||||
const blobUrl = URL.createObjectURL(audioBlob);
|
||||
setGlobalAudioURL(blobUrl);
|
||||
}
|
||||
setIsFetching(false);
|
||||
}
|
||||
|
||||
console.log('Audio stream reading ended');
|
||||
|
|
@ -194,9 +211,16 @@ export default function StreamAudio({ index = 0 }) {
|
|||
ref={audioRef}
|
||||
controls
|
||||
controlsList="nodownload nofullscreen noremoteplayback"
|
||||
className="absolute h-0 w-0 overflow-hidden"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
overflow: 'hidden',
|
||||
display: 'none',
|
||||
height: '0px',
|
||||
width: '0px',
|
||||
}}
|
||||
src={globalAudioURL || undefined}
|
||||
id={globalAudioId}
|
||||
muted
|
||||
autoPlay
|
||||
/>
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue