🎙️ fix: Optimize and Fix Browser TTS Incompatibility (firefox) (#3627)

* fix: 'disable' MsEdgeTTS on unsupported browser (firefox)

* refactor: only pass necessary props to HoverButton MessageAudio

* refactor: Fix conditional comparison operators in MessageAudio component

* refactor: Remove console.log statement in MessageAudio component
This commit is contained in:
Danny Avila 2024-08-13 04:14:37 -04:00 committed by GitHub
parent 6655304753
commit e3ebcfd2b1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 62 additions and 28 deletions

View file

@ -73,7 +73,14 @@ export default function HoverButtons({
return ( return (
<div className="visible mt-0 flex justify-center gap-1 self-end text-gray-500 lg:justify-start"> <div className="visible mt-0 flex justify-center gap-1 self-end text-gray-500 lg:justify-start">
{TextToSpeech && <MessageAudio index={index} message={message} isLast={isLast} />} {TextToSpeech && (
<MessageAudio
index={index}
messageId={message.messageId}
content={message.content ?? message.text}
isLast={isLast}
/>
)}
{isEditableEndpoint && ( {isEditableEndpoint && (
<button <button
className={cn( className={cn(
@ -128,7 +135,7 @@ export default function HoverButtons({
forkingSupported={forkingSupported} forkingSupported={forkingSupported}
latestMessage={latestMessage} latestMessage={latestMessage}
/> />
{continueSupported ? ( {continueSupported === true ? (
<button <button
className={cn( className={cn(
'hover-button active rounded-md p-1 hover:bg-gray-100 hover:text-gray-500 focus:opacity-100 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible', 'hover-button active rounded-md p-1 hover:bg-gray-100 hover:text-gray-500 focus:opacity-100 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible',

View file

@ -1,28 +1,34 @@
import { useEffect } from 'react'; import { useEffect } from 'react';
import { useRecoilValue } from 'recoil'; import { useRecoilValue } from 'recoil';
import type { TMessage } from 'librechat-data-provider'; import type { TMessageContentParts } from 'librechat-data-provider';
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg'; import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
import { useLocalize, useTextToSpeech } from '~/hooks'; import { useLocalize, useTextToSpeech } from '~/hooks';
import store from '~/store'; import store from '~/store';
type THoverButtons = { type THoverButtons = {
message: TMessage; messageId?: string;
content?: TMessageContentParts[] | string;
isLast: boolean; isLast: boolean;
index: number; index: number;
}; };
export default function MessageAudio({ index, message, isLast }: THoverButtons) { export default function MessageAudio({ isLast, index, messageId, content }: THoverButtons) {
const localize = useLocalize(); const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate); const playbackRate = useRecoilValue(store.playbackRate);
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTextToSpeech(message, isLast, index); const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTextToSpeech({
isLast,
index,
messageId,
content,
});
const renderIcon = (size: string) => { const renderIcon = (size: string) => {
if (isLoading) { if (isLoading === true) {
return <Spinner size={size} />; return <Spinner size={size} />;
} }
if (isSpeaking) { if (isSpeaking === true) {
return <VolumeMuteIcon size={size} />; return <VolumeMuteIcon size={size} />;
} }
@ -30,21 +36,14 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons)
}; };
useEffect(() => { useEffect(() => {
const messageAudio = document.getElementById( const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
`audio-${message.messageId}`,
) as HTMLAudioElement | null;
if (!messageAudio) { if (!messageAudio) {
return; return;
} }
if ( if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
playbackRate &&
playbackRate > 0 &&
messageAudio &&
messageAudio.playbackRate !== playbackRate
) {
messageAudio.playbackRate = playbackRate; messageAudio.playbackRate = playbackRate;
} }
}, [audioRef, isSpeaking, playbackRate, message.messageId]); }, [audioRef, isSpeaking, playbackRate, messageId]);
return ( return (
<> <>
@ -69,7 +68,7 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons)
toggleSpeech(); toggleSpeech();
}} }}
type="button" type="button"
title={isSpeaking ? localize('com_ui_stop') : localize('com_ui_read_aloud')} title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
> >
{renderIcon('19')} {renderIcon('19')}
</button> </button>
@ -84,8 +83,8 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons)
height: '0px', height: '0px',
width: '0px', width: '0px',
}} }}
src={audioRef.current?.src || undefined} src={audioRef.current?.src ?? undefined}
id={`audio-${message.messageId}`} id={`audio-${messageId}`}
muted muted
autoPlay autoPlay
/> />

View file

@ -1,7 +1,7 @@
import { useRecoilState } from 'recoil'; import { useRecoilState } from 'recoil';
import { useRef, useMemo, useEffect } from 'react'; import { useRef, useMemo, useEffect } from 'react';
import { parseTextParts } from 'librechat-data-provider'; import { parseTextParts } from 'librechat-data-provider';
import type { TMessage } from 'librechat-data-provider'; import type { TMessageContentParts } from 'librechat-data-provider';
import type { Option } from '~/common'; import type { Option } from '~/common';
import useTextToSpeechExternal from './useTextToSpeechExternal'; import useTextToSpeechExternal from './useTextToSpeechExternal';
import useTextToSpeechBrowser from './useTextToSpeechBrowser'; import useTextToSpeechBrowser from './useTextToSpeechBrowser';
@ -11,7 +11,15 @@ import { usePauseGlobalAudio } from '../Audio';
import { logger } from '~/utils'; import { logger } from '~/utils';
import store from '~/store'; import store from '~/store';
const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => { type TUseTextToSpeech = {
messageId?: string;
content?: TMessageContentParts[] | string;
isLast?: boolean;
index?: number;
};
const useTextToSpeech = (props?: TUseTextToSpeech) => {
const { messageId, content, isLast = false, index = 0 } = props ?? {};
const [voice, setVoice] = useRecoilState(store.voice); const [voice, setVoice] = useRecoilState(store.voice);
const { textToSpeechEndpoint } = useGetAudioSettings(); const { textToSpeechEndpoint } = useGetAudioSettings();
const { pauseGlobalAudio } = usePauseGlobalAudio(index); const { pauseGlobalAudio } = usePauseGlobalAudio(index);
@ -38,7 +46,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
isLoading: isLoadingExternal, isLoading: isLoadingExternal,
audioRef: audioRefExternal, audioRef: audioRefExternal,
voices: voicesExternal, voices: voicesExternal,
} = useTextToSpeechExternal(message?.messageId ?? '', isLast, index); } = useTextToSpeechExternal(messageId ?? '', isLast, index);
let generateSpeech, cancelSpeech, isSpeaking, isLoading; let generateSpeech, cancelSpeech, isSpeaking, isLoading;
@ -112,7 +120,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
isMouseDownRef.current = true; isMouseDownRef.current = true;
timerRef.current = window.setTimeout(() => { timerRef.current = window.setTimeout(() => {
if (isMouseDownRef.current) { if (isMouseDownRef.current) {
const messageContent = message?.content ?? message?.text ?? ''; const messageContent = content ?? '';
const parsedMessage = const parsedMessage =
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent); typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
generateSpeech(parsedMessage, false); generateSpeech(parsedMessage, false);
@ -128,11 +136,11 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
}; };
const toggleSpeech = () => { const toggleSpeech = () => {
if (isSpeaking) { if (isSpeaking === true) {
cancelSpeech(); cancelSpeech();
pauseGlobalAudio(); pauseGlobalAudio();
} else { } else {
const messageContent = message?.content ?? message?.text ?? ''; const messageContent = content ?? '';
const parsedMessage = const parsedMessage =
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent); typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
generateSpeech(parsedMessage, false); generateSpeech(parsedMessage, false);

View file

@ -1,5 +1,5 @@
import { useRecoilValue } from 'recoil'; import { useRecoilValue } from 'recoil';
import { useState, useCallback, useRef, useEffect } from 'react'; import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts'; import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
import { useToastContext } from '~/Providers'; import { useToastContext } from '~/Providers';
import useLocalize from '~/hooks/useLocalize'; import useLocalize from '~/hooks/useLocalize';
@ -29,6 +29,8 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
const pendingBuffers = useRef<Uint8Array[]>([]); const pendingBuffers = useRef<Uint8Array[]>([]);
const { showToast } = useToastContext(); const { showToast } = useToastContext();
const isBrowserSupported = useMemo(() => MediaSource.isTypeSupported('audio/mpeg'), []);
const fetchVoices = useCallback(() => { const fetchVoices = useCallback(() => {
if (!ttsRef.current) { if (!ttsRef.current) {
ttsRef.current = new MsEdgeTTS(); ttsRef.current = new MsEdgeTTS();
@ -198,14 +200,23 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
}, [showToast, localize]); }, [showToast, localize]);
useEffect(() => { useEffect(() => {
if (!MediaSource.isTypeSupported('audio/mpeg')) {
return;
}
fetchVoices(); fetchVoices();
}, [fetchVoices]); }, [fetchVoices]);
useEffect(() => { useEffect(() => {
if (!MediaSource.isTypeSupported('audio/mpeg')) {
return;
}
initializeTTS(); initializeTTS();
}, [voiceName, initializeTTS]); }, [voiceName, initializeTTS]);
useEffect(() => { useEffect(() => {
if (!MediaSource.isTypeSupported('audio/mpeg')) {
return;
}
initializeMediaSource(); initializeMediaSource();
return () => { return () => {
if (mediaSourceRef.current) { if (mediaSourceRef.current) {
@ -214,6 +225,15 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
}; };
}, [initializeMediaSource]); }, [initializeMediaSource]);
if (!isBrowserSupported) {
return {
generateSpeechEdge: () => ({}),
cancelSpeechEdge: () => ({}),
isSpeaking: false,
voices: [],
};
}
return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices }; return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices };
} }