🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)

* WIP: message audio refactor

* WIP: use MessageAudio by provider

* fix: Update MessageAudio component to use TTSEndpoints enum

* feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging

* feat: Add voice dropdown components for different TTS engines

* docs: update incorrect `voices` example

changed `voice: ''` to `voices: ['alloy']`

* feat: Add brwoser support check for Edge TTS engine component with error toast if not supported

---------

Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
Danny Avila 2024-08-15 11:34:25 -04:00 committed by GitHub
parent bcde0beb47
commit dba704079c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 784 additions and 187 deletions

View file

@ -0,0 +1,256 @@
import { useEffect, useMemo } from 'react';
import { useRecoilValue } from 'recoil';
import type { TMessageAudio } from '~/common';
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
import { useToastContext } from '~/Providers/ToastContext';
import { logger } from '~/utils';
import store from '~/store';
export function BrowserTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate);
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSBrowser({
isLast,
index,
messageId,
content,
});
const renderIcon = (size: string) => {
if (isLoading === true) {
return <Spinner size={size} />;
}
if (isSpeaking === true) {
return <VolumeMuteIcon size={size} />;
}
return <VolumeIcon size={size} />;
};
useEffect(() => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
if (!messageAudio) {
return;
}
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
messageAudio.playbackRate = playbackRate;
}
}, [audioRef, isSpeaking, playbackRate, messageId]);
logger.log(
'MessageAudio: audioRef.current?.src, audioRef.current',
audioRef.current?.src,
audioRef.current,
);
return (
<>
<button
className={className}
onClickCapture={() => {
if (audioRef.current) {
audioRef.current.muted = false;
}
toggleSpeech();
}}
type="button"
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
>
{renderIcon('19')}
</button>
<audio
ref={audioRef}
controls
preload="none"
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src}
onError={(error) => {
console.error('Error fetching audio:', error);
}}
id={`audio-${messageId}`}
muted
autoPlay
/>
</>
);
}
export function EdgeTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate);
const isBrowserSupported = useMemo(
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
[],
);
const { showToast } = useToastContext();
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSEdge({
isLast,
index,
messageId,
content,
});
const renderIcon = (size: string) => {
if (isLoading === true) {
return <Spinner size={size} />;
}
if (isSpeaking === true) {
return <VolumeMuteIcon size={size} />;
}
return <VolumeIcon size={size} />;
};
useEffect(() => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
if (!messageAudio) {
return;
}
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
messageAudio.playbackRate = playbackRate;
}
}, [audioRef, isSpeaking, playbackRate, messageId]);
logger.log(
'MessageAudio: audioRef.current?.src, audioRef.current',
audioRef.current?.src,
audioRef.current,
);
return (
<>
<button
className={className}
onClickCapture={() => {
if (!isBrowserSupported) {
showToast({
message: localize('com_nav_tts_unsupported_error'),
status: 'error',
});
return;
}
if (audioRef.current) {
audioRef.current.muted = false;
}
toggleSpeech();
}}
type="button"
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
>
{renderIcon('19')}
</button>
{isBrowserSupported ? (
<audio
ref={audioRef}
controls
preload="none"
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src}
onError={(error) => {
console.error('Error fetching audio:', error);
}}
id={`audio-${messageId}`}
muted
autoPlay
/>
) : null}
</>
);
}
export function ExternalTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate);
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSExternal({
isLast,
index,
messageId,
content,
});
const renderIcon = (size: string) => {
if (isLoading === true) {
return <Spinner size={size} />;
}
if (isSpeaking === true) {
return <VolumeMuteIcon size={size} />;
}
return <VolumeIcon size={size} />;
};
useEffect(() => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
if (!messageAudio) {
return;
}
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
messageAudio.playbackRate = playbackRate;
}
}, [audioRef, isSpeaking, playbackRate, messageId]);
logger.log(
'MessageAudio: audioRef.current?.src, audioRef.current',
audioRef.current?.src,
audioRef.current,
);
return (
<>
<button
className={className}
onClickCapture={() => {
if (audioRef.current) {
audioRef.current.muted = false;
}
toggleSpeech();
}}
type="button"
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
>
{renderIcon('19')}
</button>
<audio
ref={audioRef}
controls
preload="none"
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src}
onError={(error) => {
console.error('Error fetching audio:', error);
}}
id={`audio-${messageId}`}
muted
autoPlay
/>
</>
);
}

View file

@ -0,0 +1,94 @@
import React from 'react';
import { useRecoilState } from 'recoil';
import type { Option } from '~/common';
import DropdownNoState from '~/components/ui/DropdownNoState';
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
import { logger } from '~/utils';
import store from '~/store';
export function EdgeVoiceDropdown() {
const localize = useLocalize();
const { voices = [] } = useTTSEdge();
const [voice, setVoice] = useRecoilState(store.voice);
const handleVoiceChange = (newValue?: string | Option) => {
logger.log('Edge Voice changed:', newValue);
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
if (newVoice != null) {
return setVoice(newVoice.toString());
}
};
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_voice_select')}</div>
<DropdownNoState
key={`edge-voice-dropdown-${voices.length}`}
value={voice}
options={voices}
onChange={handleVoiceChange}
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
anchor="bottom start"
testId="EdgeVoiceDropdown"
/>
</div>
);
}
export function BrowserVoiceDropdown() {
const localize = useLocalize();
const { voices = [] } = useTTSBrowser();
const [voice, setVoice] = useRecoilState(store.voice);
const handleVoiceChange = (newValue?: string | Option) => {
logger.log('Browser Voice changed:', newValue);
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
if (newVoice != null) {
return setVoice(newVoice.toString());
}
};
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_voice_select')}</div>
<DropdownNoState
key={`browser-voice-dropdown-${voices.length}`}
value={voice}
options={voices}
onChange={handleVoiceChange}
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
anchor="bottom start"
testId="BrowserVoiceDropdown"
/>
</div>
);
}
export function ExternalVoiceDropdown() {
const localize = useLocalize();
const { voices = [] } = useTTSExternal();
const [voice, setVoice] = useRecoilState(store.voice);
const handleVoiceChange = (newValue?: string | Option) => {
logger.log('External Voice changed:', newValue);
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
if (newVoice != null) {
return setVoice(newVoice.toString());
}
};
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_voice_select')}</div>
<DropdownNoState
key={`external-voice-dropdown-${voices.length}`}
value={voice}
options={voices}
onChange={handleVoiceChange}
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
anchor="bottom start"
testId="ExternalVoiceDropdown"
/>
</div>
);
}