🔧 fix: Consolidate Text Parsing and TTS Edge Initialization (#6582)

* 🔧 fix: Update useTextToSpeechExternal to include loading state and improve text parsing logic

* fix: update msedge-tts and prevent excessive initialization attempts

* fix: Refactor text parsing logic in mongoMeili model to use parseTextParts function
This commit is contained in:
Danny Avila 2025-03-27 17:09:46 -04:00 committed by GitHub
parent a6f062e468
commit b9ebdd4aa5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 57 additions and 29 deletions

View file

@ -26,6 +26,7 @@ function useTextToSpeechEdge({
const sourceBufferRef = useRef<SourceBuffer | null>(null);
const pendingBuffers = useRef<Uint8Array[]>([]);
const { showToast } = useToastContext();
const initAttempts = useRef(0);
const isBrowserSupported = useMemo(
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
@ -57,14 +58,20 @@ function useTextToSpeechEdge({
const initializeTTS = useCallback(() => {
if (!ttsRef.current) {
ttsRef.current = new MsEdgeTTS();
ttsRef.current = new MsEdgeTTS({
enableLogger: true,
});
}
const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);
if (availableVoice) {
if (initAttempts.current > 3) {
return;
}
ttsRef.current
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
.catch((error) => {
initAttempts.current += 1;
console.error('Error initializing TTS:', error);
showToast({
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
@ -73,8 +80,9 @@ function useTextToSpeechEdge({
});
} else if (voices.length > 0) {
ttsRef.current
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
.catch((error) => {
initAttempts.current += 1;
console.error('Error initializing TTS:', error);
showToast({
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
@ -147,7 +155,8 @@ function useTextToSpeechEdge({
setIsSpeaking(true);
pendingBuffers.current = [];
const readable = ttsRef.current.toStream(text);
const result = await ttsRef.current.toStream(text);
const readable = result.audioStream;
readable.on('data', (chunk: Buffer) => {
pendingBuffers.current.push(new Uint8Array(chunk));

View file

@ -67,7 +67,10 @@ function useTextToSpeechExternal({
return playPromise().catch(console.error);
}
console.error(error);
showToast({ message: localize('com_nav_audio_play_error', { 0: error.message }), status: 'error' });
showToast({
message: localize('com_nav_audio_play_error', { 0: error.message }),
status: 'error',
});
});
newAudio.onended = () => {
@ -87,7 +90,7 @@ function useTextToSpeechExternal({
setDownloadFile(false);
};
const { mutate: processAudio } = useTextToSpeechMutation({
const { mutate: processAudio, isLoading } = useTextToSpeechMutation({
onMutate: (variables) => {
const inputText = (variables.get('input') ?? '') as string;
if (inputText.length >= 4096) {
@ -182,7 +185,7 @@ function useTextToSpeechExternal({
useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);
const isLoading = useMemo(
const isFetching = useMemo(
() => isLast && globalIsFetching && !globalIsPlaying,
[globalIsFetching, globalIsPlaying, isLast],
);
@ -192,7 +195,7 @@ function useTextToSpeechExternal({
return {
generateSpeechExternal,
cancelSpeech,
isLoading,
isLoading: isFetching || isLoading,
audioRef,
voices: voicesData,
};