mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-20 17:34:10 +01:00
🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)
* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
parent
bcde0beb47
commit
dba704079c
18 changed files with 784 additions and 187 deletions
256
client/src/components/Audio/TTS.tsx
Normal file
256
client/src/components/Audio/TTS.tsx
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
import { useEffect, useMemo } from 'react';
|
||||
import { useRecoilValue } from 'recoil';
|
||||
import type { TMessageAudio } from '~/common';
|
||||
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
|
||||
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
|
||||
import { useToastContext } from '~/Providers/ToastContext';
|
||||
import { logger } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
export function BrowserTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
|
||||
const localize = useLocalize();
|
||||
const playbackRate = useRecoilValue(store.playbackRate);
|
||||
|
||||
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSBrowser({
|
||||
isLast,
|
||||
index,
|
||||
messageId,
|
||||
content,
|
||||
});
|
||||
|
||||
const renderIcon = (size: string) => {
|
||||
if (isLoading === true) {
|
||||
return <Spinner size={size} />;
|
||||
}
|
||||
|
||||
if (isSpeaking === true) {
|
||||
return <VolumeMuteIcon size={size} />;
|
||||
}
|
||||
|
||||
return <VolumeIcon size={size} />;
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||
if (!messageAudio) {
|
||||
return;
|
||||
}
|
||||
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||
messageAudio.playbackRate = playbackRate;
|
||||
}
|
||||
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||
|
||||
logger.log(
|
||||
'MessageAudio: audioRef.current?.src, audioRef.current',
|
||||
audioRef.current?.src,
|
||||
audioRef.current,
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<button
|
||||
className={className}
|
||||
onClickCapture={() => {
|
||||
if (audioRef.current) {
|
||||
audioRef.current.muted = false;
|
||||
}
|
||||
toggleSpeech();
|
||||
}}
|
||||
type="button"
|
||||
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||
>
|
||||
{renderIcon('19')}
|
||||
</button>
|
||||
<audio
|
||||
ref={audioRef}
|
||||
controls
|
||||
preload="none"
|
||||
controlsList="nodownload nofullscreen noremoteplayback"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
overflow: 'hidden',
|
||||
display: 'none',
|
||||
height: '0px',
|
||||
width: '0px',
|
||||
}}
|
||||
src={audioRef.current?.src}
|
||||
onError={(error) => {
|
||||
console.error('Error fetching audio:', error);
|
||||
}}
|
||||
id={`audio-${messageId}`}
|
||||
muted
|
||||
autoPlay
|
||||
/>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
export function EdgeTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
|
||||
const localize = useLocalize();
|
||||
const playbackRate = useRecoilValue(store.playbackRate);
|
||||
const isBrowserSupported = useMemo(
|
||||
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
|
||||
[],
|
||||
);
|
||||
|
||||
const { showToast } = useToastContext();
|
||||
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSEdge({
|
||||
isLast,
|
||||
index,
|
||||
messageId,
|
||||
content,
|
||||
});
|
||||
|
||||
const renderIcon = (size: string) => {
|
||||
if (isLoading === true) {
|
||||
return <Spinner size={size} />;
|
||||
}
|
||||
|
||||
if (isSpeaking === true) {
|
||||
return <VolumeMuteIcon size={size} />;
|
||||
}
|
||||
|
||||
return <VolumeIcon size={size} />;
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||
if (!messageAudio) {
|
||||
return;
|
||||
}
|
||||
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||
messageAudio.playbackRate = playbackRate;
|
||||
}
|
||||
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||
|
||||
logger.log(
|
||||
'MessageAudio: audioRef.current?.src, audioRef.current',
|
||||
audioRef.current?.src,
|
||||
audioRef.current,
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<button
|
||||
className={className}
|
||||
onClickCapture={() => {
|
||||
if (!isBrowserSupported) {
|
||||
showToast({
|
||||
message: localize('com_nav_tts_unsupported_error'),
|
||||
status: 'error',
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (audioRef.current) {
|
||||
audioRef.current.muted = false;
|
||||
}
|
||||
toggleSpeech();
|
||||
}}
|
||||
type="button"
|
||||
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||
>
|
||||
{renderIcon('19')}
|
||||
</button>
|
||||
{isBrowserSupported ? (
|
||||
<audio
|
||||
ref={audioRef}
|
||||
controls
|
||||
preload="none"
|
||||
controlsList="nodownload nofullscreen noremoteplayback"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
overflow: 'hidden',
|
||||
display: 'none',
|
||||
height: '0px',
|
||||
width: '0px',
|
||||
}}
|
||||
src={audioRef.current?.src}
|
||||
onError={(error) => {
|
||||
console.error('Error fetching audio:', error);
|
||||
}}
|
||||
id={`audio-${messageId}`}
|
||||
muted
|
||||
autoPlay
|
||||
/>
|
||||
) : null}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
export function ExternalTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
|
||||
const localize = useLocalize();
|
||||
const playbackRate = useRecoilValue(store.playbackRate);
|
||||
|
||||
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSExternal({
|
||||
isLast,
|
||||
index,
|
||||
messageId,
|
||||
content,
|
||||
});
|
||||
|
||||
const renderIcon = (size: string) => {
|
||||
if (isLoading === true) {
|
||||
return <Spinner size={size} />;
|
||||
}
|
||||
|
||||
if (isSpeaking === true) {
|
||||
return <VolumeMuteIcon size={size} />;
|
||||
}
|
||||
|
||||
return <VolumeIcon size={size} />;
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||
if (!messageAudio) {
|
||||
return;
|
||||
}
|
||||
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||
messageAudio.playbackRate = playbackRate;
|
||||
}
|
||||
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||
|
||||
logger.log(
|
||||
'MessageAudio: audioRef.current?.src, audioRef.current',
|
||||
audioRef.current?.src,
|
||||
audioRef.current,
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<button
|
||||
className={className}
|
||||
onClickCapture={() => {
|
||||
if (audioRef.current) {
|
||||
audioRef.current.muted = false;
|
||||
}
|
||||
toggleSpeech();
|
||||
}}
|
||||
type="button"
|
||||
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||
>
|
||||
{renderIcon('19')}
|
||||
</button>
|
||||
<audio
|
||||
ref={audioRef}
|
||||
controls
|
||||
preload="none"
|
||||
controlsList="nodownload nofullscreen noremoteplayback"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
overflow: 'hidden',
|
||||
display: 'none',
|
||||
height: '0px',
|
||||
width: '0px',
|
||||
}}
|
||||
src={audioRef.current?.src}
|
||||
onError={(error) => {
|
||||
console.error('Error fetching audio:', error);
|
||||
}}
|
||||
id={`audio-${messageId}`}
|
||||
muted
|
||||
autoPlay
|
||||
/>
|
||||
</>
|
||||
);
|
||||
}
|
||||
94
client/src/components/Audio/Voices.tsx
Normal file
94
client/src/components/Audio/Voices.tsx
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
import React from 'react';
|
||||
import { useRecoilState } from 'recoil';
|
||||
import type { Option } from '~/common';
|
||||
import DropdownNoState from '~/components/ui/DropdownNoState';
|
||||
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
|
||||
import { logger } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
export function EdgeVoiceDropdown() {
|
||||
const localize = useLocalize();
|
||||
const { voices = [] } = useTTSEdge();
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
|
||||
const handleVoiceChange = (newValue?: string | Option) => {
|
||||
logger.log('Edge Voice changed:', newValue);
|
||||
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
||||
if (newVoice != null) {
|
||||
return setVoice(newVoice.toString());
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-between">
|
||||
<div>{localize('com_nav_voice_select')}</div>
|
||||
<DropdownNoState
|
||||
key={`edge-voice-dropdown-${voices.length}`}
|
||||
value={voice}
|
||||
options={voices}
|
||||
onChange={handleVoiceChange}
|
||||
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
||||
anchor="bottom start"
|
||||
testId="EdgeVoiceDropdown"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export function BrowserVoiceDropdown() {
|
||||
const localize = useLocalize();
|
||||
const { voices = [] } = useTTSBrowser();
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
|
||||
const handleVoiceChange = (newValue?: string | Option) => {
|
||||
logger.log('Browser Voice changed:', newValue);
|
||||
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
||||
if (newVoice != null) {
|
||||
return setVoice(newVoice.toString());
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-between">
|
||||
<div>{localize('com_nav_voice_select')}</div>
|
||||
<DropdownNoState
|
||||
key={`browser-voice-dropdown-${voices.length}`}
|
||||
value={voice}
|
||||
options={voices}
|
||||
onChange={handleVoiceChange}
|
||||
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
||||
anchor="bottom start"
|
||||
testId="BrowserVoiceDropdown"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export function ExternalVoiceDropdown() {
|
||||
const localize = useLocalize();
|
||||
const { voices = [] } = useTTSExternal();
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
|
||||
const handleVoiceChange = (newValue?: string | Option) => {
|
||||
logger.log('External Voice changed:', newValue);
|
||||
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
||||
if (newVoice != null) {
|
||||
return setVoice(newVoice.toString());
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-between">
|
||||
<div>{localize('com_nav_voice_select')}</div>
|
||||
<DropdownNoState
|
||||
key={`external-voice-dropdown-${voices.length}`}
|
||||
value={voice}
|
||||
options={voices}
|
||||
onChange={handleVoiceChange}
|
||||
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
||||
anchor="bottom start"
|
||||
testId="ExternalVoiceDropdown"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -79,6 +79,7 @@ export default function HoverButtons({
|
|||
messageId={message.messageId}
|
||||
content={message.content ?? message.text}
|
||||
isLast={isLast}
|
||||
className="hover-button rounded-md p-1 pl-0 text-gray-500 hover:bg-gray-100 hover:text-gray-500 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
|
||||
/>
|
||||
)}
|
||||
{isEditableEndpoint && (
|
||||
|
|
|
|||
|
|
@ -1,104 +1,22 @@
|
|||
import { useEffect } from 'react';
|
||||
// client/src/components/Chat/Messages/MessageAudio.tsx
|
||||
import { memo } from 'react';
|
||||
import { useRecoilValue } from 'recoil';
|
||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
|
||||
import { useLocalize, useTextToSpeech } from '~/hooks';
|
||||
import { logger } from '~/utils';
|
||||
import type { TMessageAudio } from '~/common';
|
||||
import { BrowserTTS, EdgeTTS, ExternalTTS } from '~/components/Audio/TTS';
|
||||
import { TTSEndpoints } from '~/common';
|
||||
import store from '~/store';
|
||||
|
||||
type THoverButtons = {
|
||||
messageId?: string;
|
||||
content?: TMessageContentParts[] | string;
|
||||
isLast: boolean;
|
||||
index: number;
|
||||
};
|
||||
function MessageAudio(props: TMessageAudio) {
|
||||
const engineTTS = useRecoilValue<string>(store.engineTTS);
|
||||
|
||||
export default function MessageAudio({ isLast, index, messageId, content }: THoverButtons) {
|
||||
const localize = useLocalize();
|
||||
const playbackRate = useRecoilValue(store.playbackRate);
|
||||
|
||||
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTextToSpeech({
|
||||
isLast,
|
||||
index,
|
||||
messageId,
|
||||
content,
|
||||
});
|
||||
|
||||
const renderIcon = (size: string) => {
|
||||
if (isLoading === true) {
|
||||
return <Spinner size={size} />;
|
||||
}
|
||||
|
||||
if (isSpeaking === true) {
|
||||
return <VolumeMuteIcon size={size} />;
|
||||
}
|
||||
|
||||
return <VolumeIcon size={size} />;
|
||||
const TTSComponents = {
|
||||
[TTSEndpoints.edge]: EdgeTTS,
|
||||
[TTSEndpoints.browser]: BrowserTTS,
|
||||
[TTSEndpoints.external]: ExternalTTS,
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||
if (!messageAudio) {
|
||||
return;
|
||||
}
|
||||
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||
messageAudio.playbackRate = playbackRate;
|
||||
}
|
||||
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||
|
||||
logger.log(
|
||||
'MessageAudio: audioRef.current?.src, audioRef.current',
|
||||
audioRef.current?.src,
|
||||
audioRef.current,
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<button
|
||||
className="hover-button rounded-md p-1 pl-0 text-gray-500 hover:bg-gray-100 hover:text-gray-500 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
|
||||
// onMouseDownCapture={() => {
|
||||
// if (audioRef.current) {
|
||||
// audioRef.current.muted = false;
|
||||
// }
|
||||
// handleMouseDown();
|
||||
// }}
|
||||
// onMouseUpCapture={() => {
|
||||
// if (audioRef.current) {
|
||||
// audioRef.current.muted = false;
|
||||
// }
|
||||
// handleMouseUp();
|
||||
// }}
|
||||
onClickCapture={() => {
|
||||
if (audioRef.current) {
|
||||
audioRef.current.muted = false;
|
||||
}
|
||||
toggleSpeech();
|
||||
}}
|
||||
type="button"
|
||||
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||
>
|
||||
{renderIcon('19')}
|
||||
</button>
|
||||
<audio
|
||||
ref={audioRef}
|
||||
controls
|
||||
preload="none"
|
||||
controlsList="nodownload nofullscreen noremoteplayback"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
overflow: 'hidden',
|
||||
display: 'none',
|
||||
height: '0px',
|
||||
width: '0px',
|
||||
}}
|
||||
src={audioRef.current?.src}
|
||||
onError={(error) => {
|
||||
console.error('Error fetching audio:', error);
|
||||
}}
|
||||
id={`audio-${messageId}`}
|
||||
muted
|
||||
autoPlay
|
||||
/>
|
||||
</>
|
||||
);
|
||||
const SelectedTTS = TTSComponents[engineTTS];
|
||||
return <SelectedTTS {...props} />;
|
||||
}
|
||||
|
||||
export default memo(MessageAudio);
|
||||
|
|
|
|||
|
|
@ -1,37 +1,21 @@
|
|||
import React from 'react';
|
||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||
import type { Option } from '~/common';
|
||||
import DropdownNoState from '~/components/ui/DropdownNoState';
|
||||
import { useLocalize, useTextToSpeech } from '~/hooks';
|
||||
import { logger } from '~/utils';
|
||||
import { useRecoilValue } from 'recoil';
|
||||
import {
|
||||
EdgeVoiceDropdown,
|
||||
BrowserVoiceDropdown,
|
||||
ExternalVoiceDropdown,
|
||||
} from '~/components/Audio/Voices';
|
||||
import store from '~/store';
|
||||
import { TTSEndpoints } from '~/common';
|
||||
|
||||
const voiceDropdownComponentsMap = {
|
||||
[TTSEndpoints.edge]: EdgeVoiceDropdown,
|
||||
[TTSEndpoints.browser]: BrowserVoiceDropdown,
|
||||
[TTSEndpoints.external]: ExternalVoiceDropdown,
|
||||
};
|
||||
|
||||
export default function VoiceDropdown() {
|
||||
const localize = useLocalize();
|
||||
const { voices = [] } = useTextToSpeech();
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
const engineTTS = useRecoilValue<string>(store.engineTTS);
|
||||
const VoiceDropdownComponent = voiceDropdownComponentsMap[engineTTS];
|
||||
|
||||
const handleVoiceChange = (newValue?: string | Option) => {
|
||||
logger.log('Voice changed:', newValue);
|
||||
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
||||
if (newVoice != null) {
|
||||
return setVoice(newVoice.toString());
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-between">
|
||||
<div>{localize('com_nav_voice_select')}</div>
|
||||
<DropdownNoState
|
||||
key={`voice-dropdown-${engineTTS}-${voices.length}`}
|
||||
value={voice}
|
||||
options={voices}
|
||||
onChange={handleVoiceChange}
|
||||
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
||||
anchor="bottom start"
|
||||
testId="VoiceDropdown"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
return <VoiceDropdownComponent />;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue