🎛️ fix: Improve Frontend Practices for Audio Settings (#3624)

* refactor: do not call await inside useCallbacks, rely on updates for dropdown

* fix: remember last selected voice

* refactor: Update Speech component to use TypeScript in useCallback

* refactor: Update Dropdown component styles to match header theme
This commit is contained in:
Danny Avila 2024-08-13 02:42:49 -04:00 committed by GitHub
parent 8cbb6ba166
commit 05696233a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 436 additions and 367 deletions

View file

@ -1,4 +1,4 @@
import { useRecoilState } from 'recoil';
import { useRecoilValue } from 'recoil';
import { useState, useCallback, useRef, useEffect } from 'react';
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
import { useToastContext } from '~/Providers';
@ -7,20 +7,21 @@ import store from '~/store';
interface Voice {
value: string;
display: string;
label: string;
}
interface UseTextToSpeechEdgeReturn {
generateSpeechEdge: (text: string) => Promise<void>;
generateSpeechEdge: (text: string) => void;
cancelSpeechEdge: () => void;
isSpeaking: boolean;
voices: () => Promise<Voice[]>;
voices: Voice[];
}
function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
const localize = useLocalize();
const [voices, setVoices] = useState<Voice[]>([]);
const [isSpeaking, setIsSpeaking] = useState<boolean>(false);
const [voiceName] = useRecoilState<string>(store.voice);
const voiceName = useRecoilValue(store.voice);
const ttsRef = useRef<MsEdgeTTS | null>(null);
const audioElementRef = useRef<HTMLAudioElement | null>(null);
const mediaSourceRef = useRef<MediaSource | null>(null);
@ -28,61 +29,59 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
const pendingBuffers = useRef<Uint8Array[]>([]);
const { showToast } = useToastContext();
const initializeTTS = useCallback(async (): Promise<void> => {
const fetchVoices = useCallback(() => {
if (!ttsRef.current) {
ttsRef.current = new MsEdgeTTS();
}
try {
await ttsRef.current.setMetadata(voiceName, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3);
} catch (error) {
console.error('Error initializing TTS:', error);
showToast({
message: localize('com_nav_tts_init_error', (error as Error).message),
status: 'error',
});
}
}, [voiceName, showToast, localize]);
const onSourceOpen = useCallback((): void => {
if (!sourceBufferRef.current && mediaSourceRef.current) {
try {
sourceBufferRef.current = mediaSourceRef.current.addSourceBuffer('audio/mpeg');
sourceBufferRef.current.addEventListener('updateend', appendNextBuffer);
} catch (error) {
console.error('Error adding source buffer:', error);
ttsRef.current
.getVoices()
.then((voicesList) => {
setVoices(
voicesList.map((v) => ({
value: v.ShortName,
label: v.FriendlyName,
})),
);
})
.catch((error) => {
console.error('Error fetching voices:', error);
showToast({
message: localize('com_nav_source_buffer_error'),
message: localize('com_nav_voices_fetch_error'),
status: 'error',
});
}
}
// eslint-disable-next-line react-hooks/exhaustive-deps
});
}, [showToast, localize]);
const initializeMediaSource = useCallback(async (): Promise<void> => {
return new Promise<void>((resolve) => {
if (!mediaSourceRef.current) {
mediaSourceRef.current = new MediaSource();
audioElementRef.current = new Audio();
audioElementRef.current.src = URL.createObjectURL(mediaSourceRef.current);
}
const initializeTTS = useCallback(() => {
if (!ttsRef.current) {
ttsRef.current = new MsEdgeTTS();
}
const availableVoice: Voice | undefined = voices.find((v) => v.value === voiceName);
const mediaSource = mediaSourceRef.current;
if (mediaSource.readyState === 'open') {
onSourceOpen();
resolve();
} else {
const onSourceOpenWrapper = (): void => {
onSourceOpen();
resolve();
mediaSource.removeEventListener('sourceopen', onSourceOpenWrapper);
};
mediaSource.addEventListener('sourceopen', onSourceOpenWrapper);
}
});
}, [onSourceOpen]);
if (availableVoice) {
ttsRef.current
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
.catch((error) => {
console.error('Error initializing TTS:', error);
showToast({
message: localize('com_nav_tts_init_error', (error as Error).message),
status: 'error',
});
});
} else if (voices.length > 0) {
ttsRef.current
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
.catch((error) => {
console.error('Error initializing TTS:', error);
showToast({
message: localize('com_nav_tts_init_error', (error as Error).message),
status: 'error',
});
});
}
}, [voiceName, showToast, localize, voices]);
const appendNextBuffer = useCallback((): void => {
const appendNextBuffer = useCallback(() => {
if (
sourceBufferRef.current &&
!sourceBufferRef.current.updating &&
@ -104,50 +103,81 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
}
}, [showToast, localize]);
const generateSpeechEdge = useCallback(
async (text: string): Promise<void> => {
const onSourceOpen = useCallback(() => {
if (!sourceBufferRef.current && mediaSourceRef.current) {
try {
await initializeTTS();
await initializeMediaSource();
if (!ttsRef.current || !audioElementRef.current) {
throw new Error('TTS or Audio element not initialized');
}
setIsSpeaking(true);
pendingBuffers.current = [];
const readable = await ttsRef.current.toStream(text);
readable.on('data', (chunk: Buffer) => {
pendingBuffers.current.push(new Uint8Array(chunk));
appendNextBuffer();
});
readable.on('end', () => {
if (mediaSourceRef.current && mediaSourceRef.current.readyState === 'open') {
mediaSourceRef.current.endOfStream();
}
});
audioElementRef.current.onended = () => {
setIsSpeaking(false);
};
await audioElementRef.current.play();
sourceBufferRef.current = mediaSourceRef.current.addSourceBuffer('audio/mpeg');
sourceBufferRef.current.addEventListener('updateend', appendNextBuffer);
} catch (error) {
console.error('Error generating speech:', error);
console.error('Error adding source buffer:', error);
showToast({
message: localize('com_nav_audio_play_error', (error as Error).message),
message: localize('com_nav_source_buffer_error'),
status: 'error',
});
setIsSpeaking(false);
}
}
}, [showToast, localize, appendNextBuffer]);
const initializeMediaSource = useCallback(() => {
if (!mediaSourceRef.current) {
mediaSourceRef.current = new MediaSource();
audioElementRef.current = new Audio();
audioElementRef.current.src = URL.createObjectURL(mediaSourceRef.current);
}
const mediaSource = mediaSourceRef.current;
if (mediaSource.readyState === 'open') {
onSourceOpen();
} else {
mediaSource.addEventListener('sourceopen', onSourceOpen);
}
}, [onSourceOpen]);
const generateSpeechEdge = useCallback(
(text: string) => {
const generate = async () => {
try {
if (!ttsRef.current || !audioElementRef.current) {
throw new Error('TTS or Audio element not initialized');
}
setIsSpeaking(true);
pendingBuffers.current = [];
const readable = await ttsRef.current.toStream(text);
readable.on('data', (chunk: Buffer) => {
pendingBuffers.current.push(new Uint8Array(chunk));
appendNextBuffer();
});
readable.on('end', () => {
if (mediaSourceRef.current && mediaSourceRef.current.readyState === 'open') {
mediaSourceRef.current.endOfStream();
}
});
audioElementRef.current.onended = () => {
setIsSpeaking(false);
};
await audioElementRef.current.play();
} catch (error) {
console.error('Error generating speech:', error);
showToast({
message: localize('com_nav_audio_play_error', (error as Error).message),
status: 'error',
});
setIsSpeaking(false);
}
};
generate();
},
[initializeTTS, initializeMediaSource, appendNextBuffer, showToast, localize],
[appendNextBuffer, showToast, localize],
);
const cancelSpeechEdge = useCallback((): void => {
const cancelSpeechEdge = useCallback(() => {
try {
if (audioElementRef.current) {
audioElementRef.current.pause();
@ -167,33 +197,22 @@ function useTextToSpeechEdge(): UseTextToSpeechEdgeReturn {
}
}, [showToast, localize]);
const voices = useCallback(async (): Promise<Voice[]> => {
if (!ttsRef.current) {
ttsRef.current = new MsEdgeTTS();
}
try {
const voicesList = await ttsRef.current.getVoices();
return voicesList.map((v) => ({
value: v.ShortName,
display: v.FriendlyName,
}));
} catch (error) {
console.error('Error fetching voices:', error);
showToast({
message: localize('com_nav_voices_fetch_error'),
status: 'error',
});
return [];
}
}, [showToast, localize]);
useEffect(() => {
fetchVoices();
}, [fetchVoices]);
useEffect(() => {
initializeTTS();
}, [voiceName, initializeTTS]);
useEffect(() => {
initializeMediaSource();
return () => {
if (mediaSourceRef.current) {
URL.revokeObjectURL(audioElementRef.current?.src || '');
URL.revokeObjectURL(audioElementRef.current?.src ?? '');
}
};
}, []);
}, [initializeMediaSource]);
return { generateSpeechEdge, cancelSpeechEdge, isSpeaking, voices };
}