mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-19 09:50:15 +01:00
🎙️ fix: Optimize and Fix Browser TTS Incompatibility (firefox) (#3627)
* fix: 'disable' MsEdgeTTS on unsupported browser (firefox) * refactor: only pass necessary props to HoverButton MessageAudio * refactor: Fix conditional comparison operators in MessageAudio component * refactor: Remove console.log statement in MessageAudio component
This commit is contained in:
parent
6655304753
commit
e3ebcfd2b1
4 changed files with 62 additions and 28 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import { useRecoilState } from 'recoil';
|
||||
import { useRef, useMemo, useEffect } from 'react';
|
||||
import { parseTextParts } from 'librechat-data-provider';
|
||||
import type { TMessage } from 'librechat-data-provider';
|
||||
import type { TMessageContentParts } from 'librechat-data-provider';
|
||||
import type { Option } from '~/common';
|
||||
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
||||
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
||||
|
|
@ -11,7 +11,15 @@ import { usePauseGlobalAudio } from '../Audio';
|
|||
import { logger } from '~/utils';
|
||||
import store from '~/store';
|
||||
|
||||
const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
||||
type TUseTextToSpeech = {
|
||||
messageId?: string;
|
||||
content?: TMessageContentParts[] | string;
|
||||
isLast?: boolean;
|
||||
index?: number;
|
||||
};
|
||||
|
||||
const useTextToSpeech = (props?: TUseTextToSpeech) => {
|
||||
const { messageId, content, isLast = false, index = 0 } = props ?? {};
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
const { textToSpeechEndpoint } = useGetAudioSettings();
|
||||
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||
|
|
@ -38,7 +46,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
|||
isLoading: isLoadingExternal,
|
||||
audioRef: audioRefExternal,
|
||||
voices: voicesExternal,
|
||||
} = useTextToSpeechExternal(message?.messageId ?? '', isLast, index);
|
||||
} = useTextToSpeechExternal(messageId ?? '', isLast, index);
|
||||
|
||||
let generateSpeech, cancelSpeech, isSpeaking, isLoading;
|
||||
|
||||
|
|
@ -112,7 +120,7 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
|||
isMouseDownRef.current = true;
|
||||
timerRef.current = window.setTimeout(() => {
|
||||
if (isMouseDownRef.current) {
|
||||
const messageContent = message?.content ?? message?.text ?? '';
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage, false);
|
||||
|
|
@ -128,11 +136,11 @@ const useTextToSpeech = (message?: TMessage, isLast = false, index = 0) => {
|
|||
};
|
||||
|
||||
const toggleSpeech = () => {
|
||||
if (isSpeaking) {
|
||||
if (isSpeaking === true) {
|
||||
cancelSpeech();
|
||||
pauseGlobalAudio();
|
||||
} else {
|
||||
const messageContent = message?.content ?? message?.text ?? '';
|
||||
const messageContent = content ?? '';
|
||||
const parsedMessage =
|
||||
typeof messageContent === 'string' ? messageContent : parseTextParts(messageContent);
|
||||
generateSpeech(parsedMessage, false);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { useRecoilValue } from 'recoil';
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
|
||||
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
|
||||
import { useToastContext } from '~/Providers';
|
||||
import useLocalize from '~/hooks/useLocalize';
|
||||
|
|
@ -29,6 +29,8 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
|
|||
const pendingBuffers = useRef<Uint8Array[]>([]);
|
||||
const { showToast } = useToastContext();
|
||||
|
||||
const isBrowserSupported = useMemo(() => MediaSource.isTypeSupported('audio/mpeg'), []);
|
||||
|
||||
const fetchVoices = useCallback(() => {
|
||||
if (!ttsRef.current) {
|
||||
ttsRef.current = new MsEdgeTTS();
|
||||
|
|
@ -198,14 +200,23 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
|
|||
}, [showToast, localize]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!MediaSource.isTypeSupported('audio/mpeg')) {
|
||||
return;
|
||||
}
|
||||
fetchVoices();
|
||||
}, [fetchVoices]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!MediaSource.isTypeSupported('audio/mpeg')) {
|
||||
return;
|
||||
}
|
||||
initializeTTS();
|
||||
}, [voiceName, initializeTTS]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!MediaSource.isTypeSupported('audio/mpeg')) {
|
||||
return;
|
||||
}
|
||||
initializeMediaSource();
|
||||
return () => {
|
||||
if (mediaSourceRef.current) {
|
||||
|
|
@ -214,6 +225,15 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
|
|||
};
|
||||
}, [initializeMediaSource]);
|
||||
|
||||
if (!isBrowserSupported) {
|
||||
return {
|
||||
generateSpeechEdge: () => ({}),
|
||||
cancelSpeechEdge: () => ({}),
|
||||
isSpeaking: false,
|
||||
voices: [],
|
||||
};
|
||||
}
|
||||
|
||||
return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices };
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue