🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)

* WIP: message audio refactor

* WIP: use MessageAudio by provider

* fix: Update MessageAudio component to use TTSEndpoints enum

* feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging

* feat: Add voice dropdown components for different TTS engines

* docs: update incorrect `voices` example

changed `voice: ''` to `voices: ['alloy']`

* feat: Add brwoser support check for Edge TTS engine component with error toast if not supported

---------

Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
Danny Avila 2024-08-15 11:34:25 -04:00 committed by GitHub
parent bcde0beb47
commit dba704079c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 784 additions and 187 deletions

View file

@ -1,43 +1,54 @@
import { useRecoilState } from 'recoil';
import { useState, useEffect, useCallback } from 'react';
import type { VoiceOption } from '~/common';
import store from '~/store';
interface VoiceOption {
value: string;
label: string;
}
function useTextToSpeechBrowser({
setIsSpeaking,
}: {
setIsSpeaking: (isSpeaking: boolean) => void;
setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
}) {
const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
const [voiceName] = useRecoilState(store.voice);
const [voices, setVoices] = useState<VoiceOption[]>([]);
const updateVoices = useCallback(() => {
const availableVoices = window.speechSynthesis
.getVoices()
.filter((v) => cloudBrowserVoices || v.localService === true);
try {
const availableVoices = window.speechSynthesis.getVoices();
if (!Array.isArray(availableVoices)) {
console.error('getVoices() did not return an array');
return;
}
const voiceOptions: VoiceOption[] = availableVoices.map((v) => ({
value: v.name,
label: v.name,
}));
const filteredVoices = availableVoices.filter(
(v) => cloudBrowserVoices || v.localService === true,
);
const voiceOptions: VoiceOption[] = filteredVoices.map((v) => ({
value: v.name,
label: v.name,
}));
setVoices(voiceOptions);
setVoices(voiceOptions);
} catch (error) {
console.error('Error updating voices:', error);
}
}, [cloudBrowserVoices]);
useEffect(() => {
if (window.speechSynthesis.getVoices().length) {
updateVoices();
} else {
window.speechSynthesis.onvoiceschanged = updateVoices;
const synth = window.speechSynthesis;
try {
if (synth.getVoices().length) {
updateVoices();
} else {
synth.onvoiceschanged = updateVoices;
}
} catch (error) {
console.error('Error in useEffect:', error);
}
return () => {
window.speechSynthesis.onvoiceschanged = null;
synth.onvoiceschanged = null;
};
}, [updateVoices]);
@ -46,22 +57,37 @@ function useTextToSpeechBrowser({
const voice = voices.find((v) => v.value === voiceName);
if (!voice) {
console.warn('Selected voice not found');
return;
}
synth.cancel();
const utterance = new SpeechSynthesisUtterance(text);
utterance.voice = synth.getVoices().find((v) => v.name === voice.value) || null;
utterance.onend = () => {
try {
synth.cancel();
const utterance = new SpeechSynthesisUtterance(text);
utterance.voice = synth.getVoices().find((v) => v.name === voice.value) || null;
utterance.onend = () => {
setIsSpeaking(false);
};
utterance.onerror = (event) => {
console.error('Speech synthesis error:', event);
setIsSpeaking(false);
};
setIsSpeaking(true);
synth.speak(utterance);
} catch (error) {
console.error('Error generating speech:', error);
setIsSpeaking(false);
};
setIsSpeaking(true);
synth.speak(utterance);
}
};
const cancelSpeechLocal = () => {
window.speechSynthesis.cancel();
setIsSpeaking(false);
try {
window.speechSynthesis.cancel();
} catch (error) {
console.error('Error cancelling speech:', error);
} finally {
setIsSpeaking(false);
}
};
return { generateSpeechLocal, cancelSpeechLocal, voices };