mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-19 09:50:15 +01:00
🎙️ fix: Optimize and Fix Browser TTS Incompatibility (firefox) (#3627)
* fix: 'disable' MsEdgeTTS on unsupported browser (firefox) * refactor: only pass necessary props to HoverButton MessageAudio * refactor: Fix conditional comparison operators in MessageAudio component * refactor: Remove console.log statement in MessageAudio component
This commit is contained in:
parent
6655304753
commit
e3ebcfd2b1
4 changed files with 62 additions and 28 deletions
|
|
@ -73,7 +73,14 @@ export default function HoverButtons({
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="visible mt-0 flex justify-center gap-1 self-end text-gray-500 lg:justify-start">
|
<div className="visible mt-0 flex justify-center gap-1 self-end text-gray-500 lg:justify-start">
|
||||||
{TextToSpeech && <MessageAudio index={index} message={message} isLast={isLast} />}
|
{TextToSpeech && (
|
||||||
|
<MessageAudio
|
||||||
|
index={index}
|
||||||
|
messageId={message.messageId}
|
||||||
|
content={message.content ?? message.text}
|
||||||
|
isLast={isLast}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
{isEditableEndpoint && (
|
{isEditableEndpoint && (
|
||||||
<button
|
<button
|
||||||
className={cn(
|
className={cn(
|
||||||
|
|
@ -128,7 +135,7 @@ export default function HoverButtons({
|
||||||
forkingSupported={forkingSupported}
|
forkingSupported={forkingSupported}
|
||||||
latestMessage={latestMessage}
|
latestMessage={latestMessage}
|
||||||
/>
|
/>
|
||||||
{continueSupported ? (
|
{continueSupported === true ? (
|
||||||
<button
|
<button
|
||||||
className={cn(
|
className={cn(
|
||||||
'hover-button active rounded-md p-1 hover:bg-gray-100 hover:text-gray-500 focus:opacity-100 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible',
|
'hover-button active rounded-md p-1 hover:bg-gray-100 hover:text-gray-500 focus:opacity-100 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:invisible md:group-hover:visible',
|
||||||
|
|
|
||||||
|
|
@ -1,28 +1,34 @@
|
||||||
import { useEffect } from 'react';
|
import { useEffect } from 'react';
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import type { TMessage } from 'librechat-data-provider';
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||||
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
|
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
|
||||||
import { useLocalize, useTextToSpeech } from '~/hooks';
|
import { useLocalize, useTextToSpeech } from '~/hooks';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
type THoverButtons = {
|
type THoverButtons = {
|
||||||
message: TMessage;
|
messageId?: string;
|
||||||
|
content?: TMessageContentParts[] | string;
|
||||||
isLast: boolean;
|
isLast: boolean;
|
||||||
index: number;
|
index: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
export default function MessageAudio({ index, message, isLast }: THoverButtons) {
|
export default function MessageAudio({ isLast, index, messageId, content }: THoverButtons) {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const playbackRate = useRecoilValue(store.playbackRate);
|
const playbackRate = useRecoilValue(store.playbackRate);
|
||||||
|
|
||||||
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTextToSpeech(message, isLast, index);
|
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTextToSpeech({
|
||||||
|
isLast,
|
||||||
|
index,
|
||||||
|
messageId,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
|
||||||
const renderIcon = (size: string) => {
|
const renderIcon = (size: string) => {
|
||||||
if (isLoading) {
|
if (isLoading === true) {
|
||||||
return <Spinner size={size} />;
|
return <Spinner size={size} />;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isSpeaking) {
|
if (isSpeaking === true) {
|
||||||
return <VolumeMuteIcon size={size} />;
|
return <VolumeMuteIcon size={size} />;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -30,21 +36,14 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons)
|
||||||
};
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const messageAudio = document.getElementById(
|
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
|
||||||
`audio-${message.messageId}`,
|
|
||||||
) as HTMLAudioElement | null;
|
|
||||||
if (!messageAudio) {
|
if (!messageAudio) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (
|
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
|
||||||
playbackRate &&
|
|
||||||
playbackRate > 0 &&
|
|
||||||
messageAudio &&
|
|
||||||
messageAudio.playbackRate !== playbackRate
|
|
||||||
) {
|
|
||||||
messageAudio.playbackRate = playbackRate;
|
messageAudio.playbackRate = playbackRate;
|
||||||
}
|
}
|
||||||
}, [audioRef, isSpeaking, playbackRate, message.messageId]);
|
}, [audioRef, isSpeaking, playbackRate, messageId]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
|
@ -69,7 +68,7 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons)
|
||||||
toggleSpeech();
|
toggleSpeech();
|
||||||
}}
|
}}
|
||||||
type="button"
|
type="button"
|
||||||
title={isSpeaking ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
|
||||||
>
|
>
|
||||||
{renderIcon('19')}
|
{renderIcon('19')}
|
||||||
</button>
|
</button>
|
||||||
|
|
@ -84,8 +83,8 @@ export default function MessageAudio({ index, message, isLast }: THoverButtons)
|
||||||
height: '0px',
|
height: '0px',
|
||||||
width: '0px',
|
width: '0px',
|
||||||
}}
|
}}
|
||||||
src={audioRef.current?.src || undefined}
|
src={audioRef.current?.src ?? undefined}
|
||||||
id={`audio-${message.messageId}`}
|
id={`audio-${messageId}`}
|
||||||
muted
|
muted
|
||||||
autoPlay
|
autoPlay
|
||||||
/>
|
/>
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import { useRecoilState } from 'recoil';
|
import { useRecoilState } from 'recoil';
|
||||||
import { useRef, useMemo, useEffect } from 'react';
|
import { useRef, useMemo, useEffect } from 'react';
|
||||||
import { parseTextParts } from 'librechat-data-provider';
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
import type { TMessage } from 'librechat-data-provider';
|
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||||
import type { Option } from '~/common';
|
import type { Option } from '~/common';
|
||||||
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
||||||
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
||||||
|
|
@ -11,7 +11,15 @@ import { usePauseGlobalAudio } from '../Audio';
|
||||||
import { logger } from '~/utils';
|
import { logger } from '~/utils';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
type TUseTextToSpeech = {
|
||||||
|
messageId?: string;
|
||||||
|
content?: TMessageContentParts[] | string;
|
||||||
|
isLast?: boolean;
|
||||||
|
index?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const useTextToSpeech = (props?: TUseTextToSpeech) => {
|
||||||
|
const { messageId, content, isLast = false, index = 0 } = props ?? {};
|
||||||
const [voice, setVoice] = useRecoilState(store.voice);
|
const [voice, setVoice] = useRecoilState(store.voice);
|
||||||
const { textToSpeechEndpoint } = useGetAudioSettings();
|
const { textToSpeechEndpoint } = useGetAudioSettings();
|
||||||
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||||
|
|
@ -38,7 +46,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
||||||
isLoading: isLoadingExternal,
|
isLoading: isLoadingExternal,
|
||||||
audioRef: audioRefExternal,
|
audioRef: audioRefExternal,
|
||||||
voices: voicesExternal,
|
voices: voicesExternal,
|
||||||
} = useTextToSpeechExternal(message?.messageId ?? '', isLast, index);
|
} = useTextToSpeechExternal(messageId ?? '', isLast, index);
|
||||||
|
|
||||||
let generateSpeech, cancelSpeech, isSpeaking, isLoading;
|
let generateSpeech, cancelSpeech, isSpeaking, isLoading;
|
||||||
|
|
||||||
|
|
@ -112,7 +120,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
||||||
isMouseDownRef.current = true;
|
isMouseDownRef.current = true;
|
||||||
timerRef.current = window.setTimeout(() => {
|
timerRef.current = window.setTimeout(() => {
|
||||||
if (isMouseDownRef.current) {
|
if (isMouseDownRef.current) {
|
||||||
const messageContent = message?.content ?? message?.text ?? '';
|
const messageContent = content ?? '';
|
||||||
const parsedMessage =
|
const parsedMessage =
|
||||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
generateSpeech(parsedMessage, false);
|
generateSpeech(parsedMessage, false);
|
||||||
|
|
@ -128,11 +136,11 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
||||||
};
|
};
|
||||||
|
|
||||||
const toggleSpeech = () => {
|
const toggleSpeech = () => {
|
||||||
if (isSpeaking) {
|
if (isSpeaking === true) {
|
||||||
cancelSpeech();
|
cancelSpeech();
|
||||||
pauseGlobalAudio();
|
pauseGlobalAudio();
|
||||||
} else {
|
} else {
|
||||||
const messageContent = message?.content ?? message?.text ?? '';
|
const messageContent = content ?? '';
|
||||||
const parsedMessage =
|
const parsedMessage =
|
||||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||||
generateSpeech(parsedMessage, false);
|
generateSpeech(parsedMessage, false);
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
|
||||||
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
|
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
|
||||||
import { useToastContext } from '~/Providers';
|
import { useToastContext } from '~/Providers';
|
||||||
import useLocalize from '~/hooks/useLocalize';
|
import useLocalize from '~/hooks/useLocalize';
|
||||||
|
|
@ -29,6 +29,8 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
|
||||||
const pendingBuffers = useRef<Uint8Array[]>([]);
|
const pendingBuffers = useRef<Uint8Array[]>([]);
|
||||||
const { showToast } = useToastContext();
|
const { showToast } = useToastContext();
|
||||||
|
|
||||||
|
const isBrowserSupported = useMemo(() => MediaSource.isTypeSupported('audio/mpeg'), []);
|
||||||
|
|
||||||
const fetchVoices = useCallback(() => {
|
const fetchVoices = useCallback(() => {
|
||||||
if (!ttsRef.current) {
|
if (!ttsRef.current) {
|
||||||
ttsRef.current = new MsEdgeTTS();
|
ttsRef.current = new MsEdgeTTS();
|
||||||
|
|
@ -198,14 +200,23 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
|
||||||
}, [showToast, localize]);
|
}, [showToast, localize]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
if (!MediaSource.isTypeSupported('audio/mpeg')) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
fetchVoices();
|
fetchVoices();
|
||||||
}, [fetchVoices]);
|
}, [fetchVoices]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
if (!MediaSource.isTypeSupported('audio/mpeg')) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
initializeTTS();
|
initializeTTS();
|
||||||
}, [voiceName, initializeTTS]);
|
}, [voiceName, initializeTTS]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
if (!MediaSource.isTypeSupported('audio/mpeg')) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
initializeMediaSource();
|
initializeMediaSource();
|
||||||
return () => {
|
return () => {
|
||||||
if (mediaSourceRef.current) {
|
if (mediaSourceRef.current) {
|
||||||
|
|
@ -214,6 +225,15 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
|
||||||
};
|
};
|
||||||
}, [initializeMediaSource]);
|
}, [initializeMediaSource]);
|
||||||
|
|
||||||
|
if (!isBrowserSupported) {
|
||||||
|
return {
|
||||||
|
generateSpeechEdge: () => ({}),
|
||||||
|
cancelSpeechEdge: () => ({}),
|
||||||
|
isSpeaking: false,
|
||||||
|
voices: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices };
|
return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue