mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00
🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)
* WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
parent
bcde0beb47
commit
dba704079c
18 changed files with 784 additions and 187 deletions
|
@ -19,6 +19,7 @@ import type {
|
||||||
TStartupConfig,
|
TStartupConfig,
|
||||||
EModelEndpoint,
|
EModelEndpoint,
|
||||||
AssistantsEndpoint,
|
AssistantsEndpoint,
|
||||||
|
TMessageContentParts,
|
||||||
AuthorizationTypeEnum,
|
AuthorizationTypeEnum,
|
||||||
TSetOption as SetOption,
|
TSetOption as SetOption,
|
||||||
TokenExchangeMethodEnum,
|
TokenExchangeMethodEnum,
|
||||||
|
@ -31,6 +32,17 @@ export enum PromptsEditorMode {
|
||||||
ADVANCED = 'advanced',
|
ADVANCED = 'advanced',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export enum STTEndpoints {
|
||||||
|
browser = 'browser',
|
||||||
|
external = 'external',
|
||||||
|
}
|
||||||
|
|
||||||
|
export enum TTSEndpoints {
|
||||||
|
browser = 'browser',
|
||||||
|
edge = 'edge',
|
||||||
|
external = 'external',
|
||||||
|
}
|
||||||
|
|
||||||
export type AudioChunk = {
|
export type AudioChunk = {
|
||||||
audio: string;
|
audio: string;
|
||||||
isFinal: boolean;
|
isFinal: boolean;
|
||||||
|
@ -374,6 +386,19 @@ export type Option = Record<string, unknown> & {
|
||||||
value: string | number | null;
|
value: string | number | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type VoiceOption = {
|
||||||
|
value: string;
|
||||||
|
label: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type TMessageAudio = {
|
||||||
|
messageId?: string;
|
||||||
|
content?: TMessageContentParts[] | string;
|
||||||
|
className?: string;
|
||||||
|
isLast: boolean;
|
||||||
|
index: number;
|
||||||
|
};
|
||||||
|
|
||||||
export type OptionWithIcon = Option & { icon?: React.ReactNode };
|
export type OptionWithIcon = Option & { icon?: React.ReactNode };
|
||||||
export type MentionOption = OptionWithIcon & {
|
export type MentionOption = OptionWithIcon & {
|
||||||
type: string;
|
type: string;
|
||||||
|
|
256
client/src/components/Audio/TTS.tsx
Normal file
256
client/src/components/Audio/TTS.tsx
Normal file
|
@ -0,0 +1,256 @@
|
||||||
|
import { useEffect, useMemo } from 'react';
|
||||||
|
import { useRecoilValue } from 'recoil';
|
||||||
|
import type { TMessageAudio } from '~/common';
|
||||||
|
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
|
||||||
|
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
|
||||||
|
import { useToastContext } from '~/Providers/ToastContext';
|
||||||
|
import { logger } from '~/utils';
|
||||||
|
import store from '~/store';
|
||||||
|
|
||||||
|
export function BrowserTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
|
||||||
|
const localize = useLocalize();
|
||||||
|
const playbackRate = useRecoilValue(store.playbackRate);
|
||||||
|
|
||||||
|
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSBrowser({
|
||||||
|
isLast,
|
||||||
|
index,
|
||||||
|
messageId,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
|
||||||
|
const renderIcon = (size: string) => {
|
||||||
|
if (isLoading === true) {
|
||||||
|
return <Spinner size={size} />;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSpeaking === true) {
|
||||||
|
return <VolumeMuteIcon size={size} />;
|
||||||
|
}
|
||||||
|
|
||||||
|
return <VolumeIcon size={size} />;
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||||
|
if (!messageAudio) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||||
|
messageAudio.playbackRate = playbackRate;
|
||||||
|
}
|
||||||
|
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||||
|
|
||||||
|
logger.log(
|
||||||
|
'MessageAudio: audioRef.current?.src, audioRef.current',
|
||||||
|
audioRef.current?.src,
|
||||||
|
audioRef.current,
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<button
|
||||||
|
className={className}
|
||||||
|
onClickCapture={() => {
|
||||||
|
if (audioRef.current) {
|
||||||
|
audioRef.current.muted = false;
|
||||||
|
}
|
||||||
|
toggleSpeech();
|
||||||
|
}}
|
||||||
|
type="button"
|
||||||
|
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||||
|
>
|
||||||
|
{renderIcon('19')}
|
||||||
|
</button>
|
||||||
|
<audio
|
||||||
|
ref={audioRef}
|
||||||
|
controls
|
||||||
|
preload="none"
|
||||||
|
controlsList="nodownload nofullscreen noremoteplayback"
|
||||||
|
style={{
|
||||||
|
position: 'absolute',
|
||||||
|
overflow: 'hidden',
|
||||||
|
display: 'none',
|
||||||
|
height: '0px',
|
||||||
|
width: '0px',
|
||||||
|
}}
|
||||||
|
src={audioRef.current?.src}
|
||||||
|
onError={(error) => {
|
||||||
|
console.error('Error fetching audio:', error);
|
||||||
|
}}
|
||||||
|
id={`audio-${messageId}`}
|
||||||
|
muted
|
||||||
|
autoPlay
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function EdgeTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
|
||||||
|
const localize = useLocalize();
|
||||||
|
const playbackRate = useRecoilValue(store.playbackRate);
|
||||||
|
const isBrowserSupported = useMemo(
|
||||||
|
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
|
||||||
|
[],
|
||||||
|
);
|
||||||
|
|
||||||
|
const { showToast } = useToastContext();
|
||||||
|
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSEdge({
|
||||||
|
isLast,
|
||||||
|
index,
|
||||||
|
messageId,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
|
||||||
|
const renderIcon = (size: string) => {
|
||||||
|
if (isLoading === true) {
|
||||||
|
return <Spinner size={size} />;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSpeaking === true) {
|
||||||
|
return <VolumeMuteIcon size={size} />;
|
||||||
|
}
|
||||||
|
|
||||||
|
return <VolumeIcon size={size} />;
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||||
|
if (!messageAudio) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||||
|
messageAudio.playbackRate = playbackRate;
|
||||||
|
}
|
||||||
|
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||||
|
|
||||||
|
logger.log(
|
||||||
|
'MessageAudio: audioRef.current?.src, audioRef.current',
|
||||||
|
audioRef.current?.src,
|
||||||
|
audioRef.current,
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<button
|
||||||
|
className={className}
|
||||||
|
onClickCapture={() => {
|
||||||
|
if (!isBrowserSupported) {
|
||||||
|
showToast({
|
||||||
|
message: localize('com_nav_tts_unsupported_error'),
|
||||||
|
status: 'error',
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (audioRef.current) {
|
||||||
|
audioRef.current.muted = false;
|
||||||
|
}
|
||||||
|
toggleSpeech();
|
||||||
|
}}
|
||||||
|
type="button"
|
||||||
|
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||||
|
>
|
||||||
|
{renderIcon('19')}
|
||||||
|
</button>
|
||||||
|
{isBrowserSupported ? (
|
||||||
|
<audio
|
||||||
|
ref={audioRef}
|
||||||
|
controls
|
||||||
|
preload="none"
|
||||||
|
controlsList="nodownload nofullscreen noremoteplayback"
|
||||||
|
style={{
|
||||||
|
position: 'absolute',
|
||||||
|
overflow: 'hidden',
|
||||||
|
display: 'none',
|
||||||
|
height: '0px',
|
||||||
|
width: '0px',
|
||||||
|
}}
|
||||||
|
src={audioRef.current?.src}
|
||||||
|
onError={(error) => {
|
||||||
|
console.error('Error fetching audio:', error);
|
||||||
|
}}
|
||||||
|
id={`audio-${messageId}`}
|
||||||
|
muted
|
||||||
|
autoPlay
|
||||||
|
/>
|
||||||
|
) : null}
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ExternalTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
|
||||||
|
const localize = useLocalize();
|
||||||
|
const playbackRate = useRecoilValue(store.playbackRate);
|
||||||
|
|
||||||
|
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSExternal({
|
||||||
|
isLast,
|
||||||
|
index,
|
||||||
|
messageId,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
|
||||||
|
const renderIcon = (size: string) => {
|
||||||
|
if (isLoading === true) {
|
||||||
|
return <Spinner size={size} />;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSpeaking === true) {
|
||||||
|
return <VolumeMuteIcon size={size} />;
|
||||||
|
}
|
||||||
|
|
||||||
|
return <VolumeIcon size={size} />;
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||||
|
if (!messageAudio) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||||
|
messageAudio.playbackRate = playbackRate;
|
||||||
|
}
|
||||||
|
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||||
|
|
||||||
|
logger.log(
|
||||||
|
'MessageAudio: audioRef.current?.src, audioRef.current',
|
||||||
|
audioRef.current?.src,
|
||||||
|
audioRef.current,
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<button
|
||||||
|
className={className}
|
||||||
|
onClickCapture={() => {
|
||||||
|
if (audioRef.current) {
|
||||||
|
audioRef.current.muted = false;
|
||||||
|
}
|
||||||
|
toggleSpeech();
|
||||||
|
}}
|
||||||
|
type="button"
|
||||||
|
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||||
|
>
|
||||||
|
{renderIcon('19')}
|
||||||
|
</button>
|
||||||
|
<audio
|
||||||
|
ref={audioRef}
|
||||||
|
controls
|
||||||
|
preload="none"
|
||||||
|
controlsList="nodownload nofullscreen noremoteplayback"
|
||||||
|
style={{
|
||||||
|
position: 'absolute',
|
||||||
|
overflow: 'hidden',
|
||||||
|
display: 'none',
|
||||||
|
height: '0px',
|
||||||
|
width: '0px',
|
||||||
|
}}
|
||||||
|
src={audioRef.current?.src}
|
||||||
|
onError={(error) => {
|
||||||
|
console.error('Error fetching audio:', error);
|
||||||
|
}}
|
||||||
|
id={`audio-${messageId}`}
|
||||||
|
muted
|
||||||
|
autoPlay
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
94
client/src/components/Audio/Voices.tsx
Normal file
94
client/src/components/Audio/Voices.tsx
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
import React from 'react';
|
||||||
|
import { useRecoilState } from 'recoil';
|
||||||
|
import type { Option } from '~/common';
|
||||||
|
import DropdownNoState from '~/components/ui/DropdownNoState';
|
||||||
|
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
|
||||||
|
import { logger } from '~/utils';
|
||||||
|
import store from '~/store';
|
||||||
|
|
||||||
|
export function EdgeVoiceDropdown() {
|
||||||
|
const localize = useLocalize();
|
||||||
|
const { voices = [] } = useTTSEdge();
|
||||||
|
const [voice, setVoice] = useRecoilState(store.voice);
|
||||||
|
|
||||||
|
const handleVoiceChange = (newValue?: string | Option) => {
|
||||||
|
logger.log('Edge Voice changed:', newValue);
|
||||||
|
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
||||||
|
if (newVoice != null) {
|
||||||
|
return setVoice(newVoice.toString());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div>{localize('com_nav_voice_select')}</div>
|
||||||
|
<DropdownNoState
|
||||||
|
key={`edge-voice-dropdown-${voices.length}`}
|
||||||
|
value={voice}
|
||||||
|
options={voices}
|
||||||
|
onChange={handleVoiceChange}
|
||||||
|
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
||||||
|
anchor="bottom start"
|
||||||
|
testId="EdgeVoiceDropdown"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function BrowserVoiceDropdown() {
|
||||||
|
const localize = useLocalize();
|
||||||
|
const { voices = [] } = useTTSBrowser();
|
||||||
|
const [voice, setVoice] = useRecoilState(store.voice);
|
||||||
|
|
||||||
|
const handleVoiceChange = (newValue?: string | Option) => {
|
||||||
|
logger.log('Browser Voice changed:', newValue);
|
||||||
|
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
||||||
|
if (newVoice != null) {
|
||||||
|
return setVoice(newVoice.toString());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div>{localize('com_nav_voice_select')}</div>
|
||||||
|
<DropdownNoState
|
||||||
|
key={`browser-voice-dropdown-${voices.length}`}
|
||||||
|
value={voice}
|
||||||
|
options={voices}
|
||||||
|
onChange={handleVoiceChange}
|
||||||
|
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
||||||
|
anchor="bottom start"
|
||||||
|
testId="BrowserVoiceDropdown"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ExternalVoiceDropdown() {
|
||||||
|
const localize = useLocalize();
|
||||||
|
const { voices = [] } = useTTSExternal();
|
||||||
|
const [voice, setVoice] = useRecoilState(store.voice);
|
||||||
|
|
||||||
|
const handleVoiceChange = (newValue?: string | Option) => {
|
||||||
|
logger.log('External Voice changed:', newValue);
|
||||||
|
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
||||||
|
if (newVoice != null) {
|
||||||
|
return setVoice(newVoice.toString());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div>{localize('com_nav_voice_select')}</div>
|
||||||
|
<DropdownNoState
|
||||||
|
key={`external-voice-dropdown-${voices.length}`}
|
||||||
|
value={voice}
|
||||||
|
options={voices}
|
||||||
|
onChange={handleVoiceChange}
|
||||||
|
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
||||||
|
anchor="bottom start"
|
||||||
|
testId="ExternalVoiceDropdown"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
|
@ -79,6 +79,7 @@ export default function HoverButtons({
|
||||||
messageId={message.messageId}
|
messageId={message.messageId}
|
||||||
content={message.content ?? message.text}
|
content={message.content ?? message.text}
|
||||||
isLast={isLast}
|
isLast={isLast}
|
||||||
|
className="hover-button rounded-md p-1 pl-0 text-gray-500 hover:bg-gray-100 hover:text-gray-500 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
{isEditableEndpoint && (
|
{isEditableEndpoint && (
|
||||||
|
|
|
@ -1,104 +1,22 @@
|
||||||
import { useEffect } from 'react';
|
// client/src/components/Chat/Messages/MessageAudio.tsx
|
||||||
|
import { memo } from 'react';
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
import type { TMessageAudio } from '~/common';
|
||||||
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
|
import { BrowserTTS, EdgeTTS, ExternalTTS } from '~/components/Audio/TTS';
|
||||||
import { useLocalize, useTextToSpeech } from '~/hooks';
|
import { TTSEndpoints } from '~/common';
|
||||||
import { logger } from '~/utils';
|
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
type THoverButtons = {
|
function MessageAudio(props: TMessageAudio) {
|
||||||
messageId?: string;
|
const engineTTS = useRecoilValue<string>(store.engineTTS);
|
||||||
content?: TMessageContentParts[] | string;
|
|
||||||
isLast: boolean;
|
|
||||||
index: number;
|
|
||||||
};
|
|
||||||
|
|
||||||
export default function MessageAudio({ isLast, index, messageId, content }: THoverButtons) {
|
const TTSComponents = {
|
||||||
const localize = useLocalize();
|
[TTSEndpoints.edge]: EdgeTTS,
|
||||||
const playbackRate = useRecoilValue(store.playbackRate);
|
[TTSEndpoints.browser]: BrowserTTS,
|
||||||
|
[TTSEndpoints.external]: ExternalTTS,
|
||||||
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTextToSpeech({
|
|
||||||
isLast,
|
|
||||||
index,
|
|
||||||
messageId,
|
|
||||||
content,
|
|
||||||
});
|
|
||||||
|
|
||||||
const renderIcon = (size: string) => {
|
|
||||||
if (isLoading === true) {
|
|
||||||
return <Spinner size={size} />;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isSpeaking === true) {
|
|
||||||
return <VolumeMuteIcon size={size} />;
|
|
||||||
}
|
|
||||||
|
|
||||||
return <VolumeIcon size={size} />;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
const SelectedTTS = TTSComponents[engineTTS];
|
||||||
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
return <SelectedTTS {...props} />;
|
||||||
if (!messageAudio) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
|
||||||
messageAudio.playbackRate = playbackRate;
|
|
||||||
}
|
|
||||||
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
|
||||||
|
|
||||||
logger.log(
|
|
||||||
'MessageAudio: audioRef.current?.src, audioRef.current',
|
|
||||||
audioRef.current?.src,
|
|
||||||
audioRef.current,
|
|
||||||
);
|
|
||||||
|
|
||||||
return (
|
|
||||||
<>
|
|
||||||
<button
|
|
||||||
className="hover-button rounded-md p-1 pl-0 text-gray-500 hover:bg-gray-100 hover:text-gray-500 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
|
|
||||||
// onMouseDownCapture={() => {
|
|
||||||
// if (audioRef.current) {
|
|
||||||
// audioRef.current.muted = false;
|
|
||||||
// }
|
|
||||||
// handleMouseDown();
|
|
||||||
// }}
|
|
||||||
// onMouseUpCapture={() => {
|
|
||||||
// if (audioRef.current) {
|
|
||||||
// audioRef.current.muted = false;
|
|
||||||
// }
|
|
||||||
// handleMouseUp();
|
|
||||||
// }}
|
|
||||||
onClickCapture={() => {
|
|
||||||
if (audioRef.current) {
|
|
||||||
audioRef.current.muted = false;
|
|
||||||
}
|
|
||||||
toggleSpeech();
|
|
||||||
}}
|
|
||||||
type="button"
|
|
||||||
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
|
||||||
>
|
|
||||||
{renderIcon('19')}
|
|
||||||
</button>
|
|
||||||
<audio
|
|
||||||
ref={audioRef}
|
|
||||||
controls
|
|
||||||
preload="none"
|
|
||||||
controlsList="nodownload nofullscreen noremoteplayback"
|
|
||||||
style={{
|
|
||||||
position: 'absolute',
|
|
||||||
overflow: 'hidden',
|
|
||||||
display: 'none',
|
|
||||||
height: '0px',
|
|
||||||
width: '0px',
|
|
||||||
}}
|
|
||||||
src={audioRef.current?.src}
|
|
||||||
onError={(error) => {
|
|
||||||
console.error('Error fetching audio:', error);
|
|
||||||
}}
|
|
||||||
id={`audio-${messageId}`}
|
|
||||||
muted
|
|
||||||
autoPlay
|
|
||||||
/>
|
|
||||||
</>
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export default memo(MessageAudio);
|
||||||
|
|
|
@ -1,37 +1,21 @@
|
||||||
import React from 'react';
|
import { useRecoilValue } from 'recoil';
|
||||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
import {
|
||||||
import type { Option } from '~/common';
|
EdgeVoiceDropdown,
|
||||||
import DropdownNoState from '~/components/ui/DropdownNoState';
|
BrowserVoiceDropdown,
|
||||||
import { useLocalize, useTextToSpeech } from '~/hooks';
|
ExternalVoiceDropdown,
|
||||||
import { logger } from '~/utils';
|
} from '~/components/Audio/Voices';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
import { TTSEndpoints } from '~/common';
|
||||||
|
|
||||||
|
const voiceDropdownComponentsMap = {
|
||||||
|
[TTSEndpoints.edge]: EdgeVoiceDropdown,
|
||||||
|
[TTSEndpoints.browser]: BrowserVoiceDropdown,
|
||||||
|
[TTSEndpoints.external]: ExternalVoiceDropdown,
|
||||||
|
};
|
||||||
|
|
||||||
export default function VoiceDropdown() {
|
export default function VoiceDropdown() {
|
||||||
const localize = useLocalize();
|
|
||||||
const { voices = [] } = useTextToSpeech();
|
|
||||||
const [voice, setVoice] = useRecoilState(store.voice);
|
|
||||||
const engineTTS = useRecoilValue<string>(store.engineTTS);
|
const engineTTS = useRecoilValue<string>(store.engineTTS);
|
||||||
|
const VoiceDropdownComponent = voiceDropdownComponentsMap[engineTTS];
|
||||||
|
|
||||||
const handleVoiceChange = (newValue?: string | Option) => {
|
return <VoiceDropdownComponent />;
|
||||||
logger.log('Voice changed:', newValue);
|
|
||||||
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
|
|
||||||
if (newVoice != null) {
|
|
||||||
return setVoice(newVoice.toString());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="flex items-center justify-between">
|
|
||||||
<div>{localize('com_nav_voice_select')}</div>
|
|
||||||
<DropdownNoState
|
|
||||||
key={`voice-dropdown-${engineTTS}-${voices.length}`}
|
|
||||||
value={voice}
|
|
||||||
options={voices}
|
|
||||||
onChange={handleVoiceChange}
|
|
||||||
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
|
|
||||||
anchor="bottom start"
|
|
||||||
testId="VoiceDropdown"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
export * from './MediaSourceAppender';
|
export * from './MediaSourceAppender';
|
||||||
export { default as useCustomAudioRef } from './useCustomAudioRef';
|
export { default as useCustomAudioRef } from './useCustomAudioRef';
|
||||||
export { default as usePauseGlobalAudio } from './usePauseGlobalAudio';
|
export { default as usePauseGlobalAudio } from './usePauseGlobalAudio';
|
||||||
|
export { default as useTTSExternal } from './useTTSExternal';
|
||||||
|
export { default as useTTSBrowser } from './useTTSBrowser';
|
||||||
|
export { default as useTTSEdge } from './useTTSEdge';
|
||||||
|
|
100
client/src/hooks/Audio/useTTSBrowser.ts
Normal file
100
client/src/hooks/Audio/useTTSBrowser.ts
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
// client/src/hooks/Audio/useTTSBrowser.ts
|
||||||
|
import { useRef, useEffect, useState } from 'react';
|
||||||
|
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||||
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||||
|
import useTextToSpeechBrowser from '~/hooks/Input/useTextToSpeechBrowser';
|
||||||
|
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
||||||
|
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||||
|
import { logger } from '~/utils';
|
||||||
|
import store from '~/store';
|
||||||
|
|
||||||
|
type TUseTextToSpeech = {
|
||||||
|
messageId?: string;
|
||||||
|
content?: TMessageContentParts[] | string;
|
||||||
|
isLast?: boolean;
|
||||||
|
index?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const useTTSBrowser = (props?: TUseTextToSpeech) => {
|
||||||
|
const { content, isLast = false, index = 0 } = props ?? {};
|
||||||
|
|
||||||
|
const isMouseDownRef = useRef(false);
|
||||||
|
const timerRef = useRef<number | undefined>(undefined);
|
||||||
|
const [isSpeakingState, setIsSpeaking] = useState(false);
|
||||||
|
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
||||||
|
|
||||||
|
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||||
|
const [voice, setVoice] = useRecoilState(store.voice);
|
||||||
|
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
||||||
|
|
||||||
|
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
||||||
|
|
||||||
|
const {
|
||||||
|
generateSpeechLocal: generateSpeech,
|
||||||
|
cancelSpeechLocal: cancelSpeech,
|
||||||
|
voices,
|
||||||
|
} = useTextToSpeechBrowser({ setIsSpeaking });
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const firstVoice = voices[0];
|
||||||
|
if (voices.length && typeof firstVoice === 'object') {
|
||||||
|
const lastSelectedVoice = voices.find((v) =>
|
||||||
|
typeof v === 'object' ? v.value === voice : v === voice,
|
||||||
|
);
|
||||||
|
if (lastSelectedVoice != null) {
|
||||||
|
const currentVoice =
|
||||||
|
typeof lastSelectedVoice === 'object' ? lastSelectedVoice.value : lastSelectedVoice;
|
||||||
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: currentVoice });
|
||||||
|
setVoice(currentVoice);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice.value });
|
||||||
|
setVoice(firstVoice.value);
|
||||||
|
}
|
||||||
|
}, [setVoice, voice, voices]);
|
||||||
|
|
||||||
|
const handleMouseDown = () => {
|
||||||
|
isMouseDownRef.current = true;
|
||||||
|
timerRef.current = window.setTimeout(() => {
|
||||||
|
if (isMouseDownRef.current) {
|
||||||
|
const messageContent = content ?? '';
|
||||||
|
const parsedMessage =
|
||||||
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
|
generateSpeech(parsedMessage);
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleMouseUp = () => {
|
||||||
|
isMouseDownRef.current = false;
|
||||||
|
if (timerRef.current != null) {
|
||||||
|
window.clearTimeout(timerRef.current);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const toggleSpeech = () => {
|
||||||
|
if (isSpeaking === true) {
|
||||||
|
cancelSpeech();
|
||||||
|
pauseGlobalAudio();
|
||||||
|
} else {
|
||||||
|
const messageContent = content ?? '';
|
||||||
|
const parsedMessage =
|
||||||
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
|
generateSpeech(parsedMessage);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
handleMouseDown,
|
||||||
|
handleMouseUp,
|
||||||
|
toggleSpeech,
|
||||||
|
isSpeaking,
|
||||||
|
isLoading: false,
|
||||||
|
audioRef,
|
||||||
|
voices,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export default useTTSBrowser;
|
100
client/src/hooks/Audio/useTTSEdge.ts
Normal file
100
client/src/hooks/Audio/useTTSEdge.ts
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
// client/src/hooks/Audio/useTTSEdge.ts
|
||||||
|
import { useRef, useEffect, useState } from 'react';
|
||||||
|
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||||
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||||
|
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
||||||
|
import useTextToSpeechEdge from '~/hooks/Input/useTextToSpeechEdge';
|
||||||
|
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||||
|
import { logger } from '~/utils';
|
||||||
|
import store from '~/store';
|
||||||
|
|
||||||
|
type TUseTextToSpeech = {
|
||||||
|
messageId?: string;
|
||||||
|
content?: TMessageContentParts[] | string;
|
||||||
|
isLast?: boolean;
|
||||||
|
index?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const useTTSEdge = (props?: TUseTextToSpeech) => {
|
||||||
|
const { content, isLast = false, index = 0 } = props ?? {};
|
||||||
|
|
||||||
|
const isMouseDownRef = useRef(false);
|
||||||
|
const timerRef = useRef<number | undefined>(undefined);
|
||||||
|
const [isSpeakingState, setIsSpeaking] = useState(false);
|
||||||
|
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
||||||
|
|
||||||
|
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||||
|
const [voice, setVoice] = useRecoilState(store.voice);
|
||||||
|
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
||||||
|
|
||||||
|
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
||||||
|
|
||||||
|
const {
|
||||||
|
generateSpeechEdge: generateSpeech,
|
||||||
|
cancelSpeechEdge: cancelSpeech,
|
||||||
|
voices,
|
||||||
|
} = useTextToSpeechEdge({ setIsSpeaking });
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const firstVoice = voices[0];
|
||||||
|
if (voices.length && typeof firstVoice === 'object') {
|
||||||
|
const lastSelectedVoice = voices.find((v) =>
|
||||||
|
typeof v === 'object' ? v.value === voice : v === voice,
|
||||||
|
);
|
||||||
|
if (lastSelectedVoice != null) {
|
||||||
|
const currentVoice =
|
||||||
|
typeof lastSelectedVoice === 'object' ? lastSelectedVoice.value : lastSelectedVoice;
|
||||||
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: currentVoice });
|
||||||
|
setVoice(currentVoice);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice.value });
|
||||||
|
setVoice(firstVoice.value);
|
||||||
|
}
|
||||||
|
}, [setVoice, voice, voices]);
|
||||||
|
|
||||||
|
const handleMouseDown = () => {
|
||||||
|
isMouseDownRef.current = true;
|
||||||
|
timerRef.current = window.setTimeout(() => {
|
||||||
|
if (isMouseDownRef.current) {
|
||||||
|
const messageContent = content ?? '';
|
||||||
|
const parsedMessage =
|
||||||
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
|
generateSpeech(parsedMessage);
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleMouseUp = () => {
|
||||||
|
isMouseDownRef.current = false;
|
||||||
|
if (timerRef.current != null) {
|
||||||
|
window.clearTimeout(timerRef.current);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const toggleSpeech = () => {
|
||||||
|
if (isSpeaking === true) {
|
||||||
|
cancelSpeech();
|
||||||
|
pauseGlobalAudio();
|
||||||
|
} else {
|
||||||
|
const messageContent = content ?? '';
|
||||||
|
const parsedMessage =
|
||||||
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
|
generateSpeech(parsedMessage);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
handleMouseDown,
|
||||||
|
handleMouseUp,
|
||||||
|
toggleSpeech,
|
||||||
|
isSpeaking,
|
||||||
|
isLoading: false,
|
||||||
|
audioRef,
|
||||||
|
voices,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export default useTTSEdge;
|
101
client/src/hooks/Audio/useTTSExternal.ts
Normal file
101
client/src/hooks/Audio/useTTSExternal.ts
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
// client/src/hooks/Audio/useTTSExternal.ts
|
||||||
|
import { useRef, useEffect, useState } from 'react';
|
||||||
|
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||||
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||||
|
import useTextToSpeechExternal from '~/hooks/Input/useTextToSpeechExternal';
|
||||||
|
import usePauseGlobalAudio from '~/hooks/Audio/usePauseGlobalAudio';
|
||||||
|
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||||
|
import { logger } from '~/utils';
|
||||||
|
import store from '~/store';
|
||||||
|
|
||||||
|
type TUseTextToSpeech = {
|
||||||
|
messageId?: string;
|
||||||
|
content?: TMessageContentParts[] | string;
|
||||||
|
isLast?: boolean;
|
||||||
|
index?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const useTTSExternal = (props?: TUseTextToSpeech) => {
|
||||||
|
const { messageId, content, isLast = false, index = 0 } = props ?? {};
|
||||||
|
|
||||||
|
const isMouseDownRef = useRef(false);
|
||||||
|
const timerRef = useRef<number | undefined>(undefined);
|
||||||
|
const [isSpeakingState, setIsSpeaking] = useState(false);
|
||||||
|
const { audioRef } = useAudioRef({ setIsPlaying: setIsSpeaking });
|
||||||
|
|
||||||
|
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||||
|
const [voice, setVoice] = useRecoilState(store.voice);
|
||||||
|
const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index));
|
||||||
|
|
||||||
|
const isSpeaking = isSpeakingState || (isLast && globalIsPlaying);
|
||||||
|
const {
|
||||||
|
cancelSpeech,
|
||||||
|
generateSpeechExternal: generateSpeech,
|
||||||
|
isLoading,
|
||||||
|
voices,
|
||||||
|
} = useTextToSpeechExternal({
|
||||||
|
setIsSpeaking,
|
||||||
|
audioRef,
|
||||||
|
messageId,
|
||||||
|
isLast,
|
||||||
|
index,
|
||||||
|
});
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const firstVoice = voices[0];
|
||||||
|
if (voices.length) {
|
||||||
|
const lastSelectedVoice = voices.find((v) => v === voice);
|
||||||
|
if (lastSelectedVoice != null) {
|
||||||
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: lastSelectedVoice });
|
||||||
|
setVoice(lastSelectedVoice.toString());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
logger.log('useTextToSpeech.ts - Effect:', { voices, voice: firstVoice });
|
||||||
|
setVoice(firstVoice.toString());
|
||||||
|
}
|
||||||
|
}, [setVoice, voice, voices]);
|
||||||
|
|
||||||
|
const handleMouseDown = () => {
|
||||||
|
isMouseDownRef.current = true;
|
||||||
|
timerRef.current = window.setTimeout(() => {
|
||||||
|
if (isMouseDownRef.current) {
|
||||||
|
const messageContent = content ?? '';
|
||||||
|
const parsedMessage =
|
||||||
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
|
generateSpeech(parsedMessage, false);
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleMouseUp = () => {
|
||||||
|
isMouseDownRef.current = false;
|
||||||
|
if (timerRef.current != null) {
|
||||||
|
window.clearTimeout(timerRef.current);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const toggleSpeech = () => {
|
||||||
|
if (isSpeaking === true) {
|
||||||
|
cancelSpeech();
|
||||||
|
pauseGlobalAudio();
|
||||||
|
} else {
|
||||||
|
const messageContent = content ?? '';
|
||||||
|
const parsedMessage =
|
||||||
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
|
generateSpeech(parsedMessage, false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
handleMouseDown,
|
||||||
|
handleMouseUp,
|
||||||
|
toggleSpeech,
|
||||||
|
isSpeaking,
|
||||||
|
isLoading,
|
||||||
|
audioRef,
|
||||||
|
voices,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export default useTTSExternal;
|
|
@ -2,17 +2,6 @@ import { useMemo } from 'react';
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
export enum STTEndpoints {
|
|
||||||
browser = 'browser',
|
|
||||||
external = 'external',
|
|
||||||
}
|
|
||||||
|
|
||||||
export enum TTSEndpoints {
|
|
||||||
browser = 'browser',
|
|
||||||
edge = 'edge',
|
|
||||||
external = 'external',
|
|
||||||
}
|
|
||||||
|
|
||||||
const useGetAudioSettings = () => {
|
const useGetAudioSettings = () => {
|
||||||
const engineSTT = useRecoilValue<string>(store.engineSTT);
|
const engineSTT = useRecoilValue<string>(store.engineSTT);
|
||||||
const engineTTS = useRecoilValue<string>(store.engineTTS);
|
const engineTTS = useRecoilValue<string>(store.engineTTS);
|
||||||
|
|
|
@ -3,10 +3,10 @@ import { useRef, useMemo, useEffect, useState } from 'react';
|
||||||
import { parseTextParts } from 'librechat-data-provider';
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||||
import type { Option } from '~/common';
|
import type { Option } from '~/common';
|
||||||
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
import useTextToSpeechExternal from '~/hooks/Input/useTextToSpeechExternal';
|
||||||
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
import useTextToSpeechBrowser from '~/hooks/Input/useTextToSpeechBrowser';
|
||||||
import useGetAudioSettings from './useGetAudioSettings';
|
import useGetAudioSettings from '~/hooks/Input/useGetAudioSettings';
|
||||||
import useTextToSpeechEdge from './useTextToSpeechEdge';
|
import useTextToSpeechEdge from '~/hooks/Input/useTextToSpeechEdge';
|
||||||
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
import useAudioRef from '~/hooks/Audio/useAudioRef';
|
||||||
import { usePauseGlobalAudio } from '../Audio';
|
import { usePauseGlobalAudio } from '../Audio';
|
||||||
import { logger } from '~/utils';
|
import { logger } from '~/utils';
|
||||||
|
|
|
@ -1,43 +1,54 @@
|
||||||
import { useRecoilState } from 'recoil';
|
import { useRecoilState } from 'recoil';
|
||||||
import { useState, useEffect, useCallback } from 'react';
|
import { useState, useEffect, useCallback } from 'react';
|
||||||
|
import type { VoiceOption } from '~/common';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
interface VoiceOption {
|
|
||||||
value: string;
|
|
||||||
label: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function useTextToSpeechBrowser({
|
function useTextToSpeechBrowser({
|
||||||
setIsSpeaking,
|
setIsSpeaking,
|
||||||
}: {
|
}: {
|
||||||
setIsSpeaking: (isSpeaking: boolean) => void;
|
setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
|
||||||
}) {
|
}) {
|
||||||
const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
|
const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
|
||||||
const [voiceName] = useRecoilState(store.voice);
|
const [voiceName] = useRecoilState(store.voice);
|
||||||
const [voices, setVoices] = useState<VoiceOption[]>([]);
|
const [voices, setVoices] = useState<VoiceOption[]>([]);
|
||||||
|
|
||||||
const updateVoices = useCallback(() => {
|
const updateVoices = useCallback(() => {
|
||||||
const availableVoices = window.speechSynthesis
|
try {
|
||||||
.getVoices()
|
const availableVoices = window.speechSynthesis.getVoices();
|
||||||
.filter((v) => cloudBrowserVoices || v.localService === true);
|
if (!Array.isArray(availableVoices)) {
|
||||||
|
console.error('getVoices() did not return an array');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const voiceOptions: VoiceOption[] = availableVoices.map((v) => ({
|
const filteredVoices = availableVoices.filter(
|
||||||
|
(v) => cloudBrowserVoices || v.localService === true,
|
||||||
|
);
|
||||||
|
const voiceOptions: VoiceOption[] = filteredVoices.map((v) => ({
|
||||||
value: v.name,
|
value: v.name,
|
||||||
label: v.name,
|
label: v.name,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
setVoices(voiceOptions);
|
setVoices(voiceOptions);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error updating voices:', error);
|
||||||
|
}
|
||||||
}, [cloudBrowserVoices]);
|
}, [cloudBrowserVoices]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (window.speechSynthesis.getVoices().length) {
|
const synth = window.speechSynthesis;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (synth.getVoices().length) {
|
||||||
updateVoices();
|
updateVoices();
|
||||||
} else {
|
} else {
|
||||||
window.speechSynthesis.onvoiceschanged = updateVoices;
|
synth.onvoiceschanged = updateVoices;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in useEffect:', error);
|
||||||
}
|
}
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
window.speechSynthesis.onvoiceschanged = null;
|
synth.onvoiceschanged = null;
|
||||||
};
|
};
|
||||||
}, [updateVoices]);
|
}, [updateVoices]);
|
||||||
|
|
||||||
|
@ -46,22 +57,37 @@ function useTextToSpeechBrowser({
|
||||||
const voice = voices.find((v) => v.value === voiceName);
|
const voice = voices.find((v) => v.value === voiceName);
|
||||||
|
|
||||||
if (!voice) {
|
if (!voice) {
|
||||||
|
console.warn('Selected voice not found');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
synth.cancel();
|
synth.cancel();
|
||||||
const utterance = new SpeechSynthesisUtterance(text);
|
const utterance = new SpeechSynthesisUtterance(text);
|
||||||
utterance.voice = synth.getVoices().find((v) => v.name === voice.value) || null;
|
utterance.voice = synth.getVoices().find((v) => v.name === voice.value) || null;
|
||||||
utterance.onend = () => {
|
utterance.onend = () => {
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
};
|
};
|
||||||
|
utterance.onerror = (event) => {
|
||||||
|
console.error('Speech synthesis error:', event);
|
||||||
|
setIsSpeaking(false);
|
||||||
|
};
|
||||||
setIsSpeaking(true);
|
setIsSpeaking(true);
|
||||||
synth.speak(utterance);
|
synth.speak(utterance);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error generating speech:', error);
|
||||||
|
setIsSpeaking(false);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const cancelSpeechLocal = () => {
|
const cancelSpeechLocal = () => {
|
||||||
|
try {
|
||||||
window.speechSynthesis.cancel();
|
window.speechSynthesis.cancel();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error cancelling speech:', error);
|
||||||
|
} finally {
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return { generateSpeechLocal, cancelSpeechLocal, voices };
|
return { generateSpeechLocal, cancelSpeechLocal, voices };
|
||||||
|
|
|
@ -1,28 +1,24 @@
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
|
|
||||||
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
|
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
|
||||||
import { useToastContext } from '~/Providers';
|
import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
|
||||||
|
import type { VoiceOption } from '~/common';
|
||||||
|
import { useToastContext } from '~/Providers/ToastContext';
|
||||||
import useLocalize from '~/hooks/useLocalize';
|
import useLocalize from '~/hooks/useLocalize';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
interface Voice {
|
|
||||||
value: string;
|
|
||||||
label: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface UseTextToSpeechEdgeReturn {
|
interface UseTextToSpeechEdgeReturn {
|
||||||
generateSpeechEdge: (text: string) => void;
|
generateSpeechEdge: (text: string) => void;
|
||||||
cancelSpeechEdge: () => void;
|
cancelSpeechEdge: () => void;
|
||||||
voices: Voice[];
|
voices: VoiceOption[];
|
||||||
}
|
}
|
||||||
|
|
||||||
function useTextToSpeechEdge({
|
function useTextToSpeechEdge({
|
||||||
setIsSpeaking,
|
setIsSpeaking,
|
||||||
}: {
|
}: {
|
||||||
setIsSpeaking: (isSpeaking: boolean) => void;
|
setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
|
||||||
}): UseTextToSpeechEdgeReturn {
|
}): UseTextToSpeechEdgeReturn {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [voices, setVoices] = useState<Voice[]>([]);
|
const [voices, setVoices] = useState<VoiceOption[]>([]);
|
||||||
const voiceName = useRecoilValue(store.voice);
|
const voiceName = useRecoilValue(store.voice);
|
||||||
const ttsRef = useRef<MsEdgeTTS | null>(null);
|
const ttsRef = useRef<MsEdgeTTS | null>(null);
|
||||||
const audioElementRef = useRef<HTMLAudioElement | null>(null);
|
const audioElementRef = useRef<HTMLAudioElement | null>(null);
|
||||||
|
@ -63,7 +59,7 @@ function useTextToSpeechEdge({
|
||||||
if (!ttsRef.current) {
|
if (!ttsRef.current) {
|
||||||
ttsRef.current = new MsEdgeTTS();
|
ttsRef.current = new MsEdgeTTS();
|
||||||
}
|
}
|
||||||
const availableVoice: Voice | undefined = voices.find((v) => v.value === voiceName);
|
const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);
|
||||||
|
|
||||||
if (availableVoice) {
|
if (availableVoice) {
|
||||||
ttsRef.current
|
ttsRef.current
|
||||||
|
@ -181,7 +177,7 @@ function useTextToSpeechEdge({
|
||||||
|
|
||||||
generate();
|
generate();
|
||||||
},
|
},
|
||||||
[appendNextBuffer, showToast, localize],
|
[setIsSpeaking, appendNextBuffer, showToast, localize],
|
||||||
);
|
);
|
||||||
|
|
||||||
const cancelSpeechEdge = useCallback(() => {
|
const cancelSpeechEdge = useCallback(() => {
|
||||||
|
@ -202,7 +198,7 @@ function useTextToSpeechEdge({
|
||||||
status: 'error',
|
status: 'error',
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}, [showToast, localize]);
|
}, [setIsSpeaking, showToast, localize]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!isBrowserSupported) {
|
if (!isBrowserSupported) {
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import { useState, useMemo, useRef, useCallback, useEffect } from 'react';
|
import { useState, useMemo, useRef, useCallback, useEffect } from 'react';
|
||||||
import { useTextToSpeechMutation, useVoicesQuery } from '~/data-provider';
|
import { useTextToSpeechMutation, useVoicesQuery } from '~/data-provider';
|
||||||
|
import { useToastContext } from '~/Providers/ToastContext';
|
||||||
import useLocalize from '~/hooks/useLocalize';
|
import useLocalize from '~/hooks/useLocalize';
|
||||||
import { useToastContext } from '~/Providers';
|
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
const createFormData = (text: string, voice: string) => {
|
const createFormData = (text: string, voice: string) => {
|
||||||
|
@ -13,7 +13,7 @@ const createFormData = (text: string, voice: string) => {
|
||||||
};
|
};
|
||||||
|
|
||||||
type TUseTTSExternal = {
|
type TUseTTSExternal = {
|
||||||
setIsSpeaking: (isSpeaking: boolean) => void;
|
setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
|
||||||
audioRef: React.MutableRefObject<HTMLAudioElement | null>;
|
audioRef: React.MutableRefObject<HTMLAudioElement | null>;
|
||||||
messageId?: string;
|
messageId?: string;
|
||||||
isLast: boolean;
|
isLast: boolean;
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
export * from './Audio';
|
||||||
export * from './Assistants';
|
export * from './Assistants';
|
||||||
export * from './Chat';
|
export * from './Chat';
|
||||||
export * from './Config';
|
export * from './Config';
|
||||||
|
|
|
@ -664,6 +664,8 @@ export default {
|
||||||
com_nav_audio_process_error: 'Error processing audio: {0}',
|
com_nav_audio_process_error: 'Error processing audio: {0}',
|
||||||
com_nav_long_audio_warning: 'Longer texts will take longer to process.',
|
com_nav_long_audio_warning: 'Longer texts will take longer to process.',
|
||||||
com_nav_tts_init_error: 'Failed to initialize text-to-speech: {0}',
|
com_nav_tts_init_error: 'Failed to initialize text-to-speech: {0}',
|
||||||
|
com_nav_tts_unsupported_error:
|
||||||
|
'Text-to-speech for the selected engine is not supported in this browser.',
|
||||||
com_nav_source_buffer_error: 'Error setting up audio playback. Please refresh the page.',
|
com_nav_source_buffer_error: 'Error setting up audio playback. Please refresh the page.',
|
||||||
com_nav_media_source_init_error:
|
com_nav_media_source_init_error:
|
||||||
'Unable to prepare audio player. Please check your browser settings.',
|
'Unable to prepare audio player. Please check your browser settings.',
|
||||||
|
|
|
@ -31,7 +31,8 @@ registration:
|
||||||
# url: ''
|
# url: ''
|
||||||
# apiKey: '${TTS_API_KEY}'
|
# apiKey: '${TTS_API_KEY}'
|
||||||
# model: ''
|
# model: ''
|
||||||
# voice: ''
|
# voices: ['']
|
||||||
|
|
||||||
#
|
#
|
||||||
# stt:
|
# stt:
|
||||||
# openai:
|
# openai:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue