2024-08-13 02:42:49 -04:00
|
|
|
import { useRecoilValue } from 'recoil';
|
2024-08-07 20:15:41 +02:00
|
|
|
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
|
2024-08-15 11:34:25 -04:00
|
|
|
import { useState, useCallback, useRef, useEffect, useMemo } from 'react';
|
|
|
|
|
import type { VoiceOption } from '~/common';
|
|
|
|
|
import { useToastContext } from '~/Providers/ToastContext';
|
2024-08-07 20:15:41 +02:00
|
|
|
import useLocalize from '~/hooks/useLocalize';
|
|
|
|
|
import store from '~/store';
|
|
|
|
|
|
|
|
|
|
interface UseTextToSpeechEdgeReturn {
|
2024-08-13 02:42:49 -04:00
|
|
|
generateSpeechEdge: (text: string) => void;
|
2024-08-07 20:15:41 +02:00
|
|
|
cancelSpeechEdge: () => void;
|
2024-08-15 11:34:25 -04:00
|
|
|
voices: VoiceOption[];
|
2024-08-07 20:15:41 +02:00
|
|
|
}
|
|
|
|
|
|
2024-08-13 12:08:55 -04:00
|
|
|
function useTextToSpeechEdge({
|
|
|
|
|
setIsSpeaking,
|
|
|
|
|
}: {
|
2024-08-15 11:34:25 -04:00
|
|
|
setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>;
|
2024-08-13 12:08:55 -04:00
|
|
|
}): UseTextToSpeechEdgeReturn {
|
2024-08-07 20:15:41 +02:00
|
|
|
const localize = useLocalize();
|
2024-08-15 11:34:25 -04:00
|
|
|
const [voices, setVoices] = useState<VoiceOption[]>([]);
|
2024-08-13 02:42:49 -04:00
|
|
|
const voiceName = useRecoilValue(store.voice);
|
2024-08-07 20:15:41 +02:00
|
|
|
const ttsRef = useRef<MsEdgeTTS | null>(null);
|
|
|
|
|
const audioElementRef = useRef<HTMLAudioElement | null>(null);
|
|
|
|
|
const mediaSourceRef = useRef<MediaSource | null>(null);
|
|
|
|
|
const sourceBufferRef = useRef<SourceBuffer | null>(null);
|
|
|
|
|
const pendingBuffers = useRef<Uint8Array[]>([]);
|
|
|
|
|
const { showToast } = useToastContext();
|
2025-03-27 17:09:46 -04:00
|
|
|
const initAttempts = useRef(0);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 12:08:55 -04:00
|
|
|
const isBrowserSupported = useMemo(
|
|
|
|
|
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
|
|
|
|
|
[],
|
|
|
|
|
);
|
2024-08-13 04:14:37 -04:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const fetchVoices = useCallback(() => {
|
2024-08-07 20:15:41 +02:00
|
|
|
if (!ttsRef.current) {
|
|
|
|
|
ttsRef.current = new MsEdgeTTS();
|
|
|
|
|
}
|
2024-08-13 02:42:49 -04:00
|
|
|
ttsRef.current
|
|
|
|
|
.getVoices()
|
|
|
|
|
.then((voicesList) => {
|
|
|
|
|
setVoices(
|
|
|
|
|
voicesList.map((v) => ({
|
|
|
|
|
value: v.ShortName,
|
|
|
|
|
label: v.FriendlyName,
|
|
|
|
|
})),
|
|
|
|
|
);
|
|
|
|
|
})
|
|
|
|
|
.catch((error) => {
|
|
|
|
|
console.error('Error fetching voices:', error);
|
2024-08-07 20:15:41 +02:00
|
|
|
showToast({
|
2024-08-13 02:42:49 -04:00
|
|
|
message: localize('com_nav_voices_fetch_error'),
|
2024-08-07 20:15:41 +02:00
|
|
|
status: 'error',
|
|
|
|
|
});
|
2024-08-13 02:42:49 -04:00
|
|
|
});
|
2024-08-07 20:15:41 +02:00
|
|
|
}, [showToast, localize]);
|
|
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const initializeTTS = useCallback(() => {
|
|
|
|
|
if (!ttsRef.current) {
|
2025-03-27 17:09:46 -04:00
|
|
|
ttsRef.current = new MsEdgeTTS({
|
|
|
|
|
enableLogger: true,
|
|
|
|
|
});
|
2024-08-13 02:42:49 -04:00
|
|
|
}
|
2024-08-15 11:34:25 -04:00
|
|
|
const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
if (availableVoice) {
|
2025-03-27 17:09:46 -04:00
|
|
|
if (initAttempts.current > 3) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2024-08-13 02:42:49 -04:00
|
|
|
ttsRef.current
|
2025-03-27 17:09:46 -04:00
|
|
|
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
|
2024-08-13 02:42:49 -04:00
|
|
|
.catch((error) => {
|
2025-03-27 17:09:46 -04:00
|
|
|
initAttempts.current += 1;
|
2024-08-13 02:42:49 -04:00
|
|
|
console.error('Error initializing TTS:', error);
|
|
|
|
|
showToast({
|
2025-02-09 18:05:31 +01:00
|
|
|
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
|
2024-08-13 02:42:49 -04:00
|
|
|
status: 'error',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
} else if (voices.length > 0) {
|
|
|
|
|
ttsRef.current
|
2025-03-27 17:09:46 -04:00
|
|
|
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
|
2024-08-13 02:42:49 -04:00
|
|
|
.catch((error) => {
|
2025-03-27 17:09:46 -04:00
|
|
|
initAttempts.current += 1;
|
2024-08-13 02:42:49 -04:00
|
|
|
console.error('Error initializing TTS:', error);
|
|
|
|
|
showToast({
|
2025-02-09 18:05:31 +01:00
|
|
|
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
|
2024-08-13 02:42:49 -04:00
|
|
|
status: 'error',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}, [voiceName, showToast, localize, voices]);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const appendNextBuffer = useCallback(() => {
|
2024-08-07 20:15:41 +02:00
|
|
|
if (
|
|
|
|
|
sourceBufferRef.current &&
|
|
|
|
|
!sourceBufferRef.current.updating &&
|
|
|
|
|
pendingBuffers.current.length > 0
|
|
|
|
|
) {
|
|
|
|
|
const nextBuffer = pendingBuffers.current.shift();
|
|
|
|
|
if (nextBuffer) {
|
|
|
|
|
try {
|
|
|
|
|
sourceBufferRef.current.appendBuffer(nextBuffer);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('Error appending buffer:', error);
|
|
|
|
|
showToast({
|
|
|
|
|
message: localize('com_nav_buffer_append_error'),
|
|
|
|
|
status: 'error',
|
|
|
|
|
});
|
|
|
|
|
pendingBuffers.current.unshift(nextBuffer);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}, [showToast, localize]);
|
|
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const onSourceOpen = useCallback(() => {
|
|
|
|
|
if (!sourceBufferRef.current && mediaSourceRef.current) {
|
2024-08-07 20:15:41 +02:00
|
|
|
try {
|
2024-08-13 02:42:49 -04:00
|
|
|
sourceBufferRef.current = mediaSourceRef.current.addSourceBuffer('audio/mpeg');
|
|
|
|
|
sourceBufferRef.current.addEventListener('updateend', appendNextBuffer);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('Error adding source buffer:', error);
|
|
|
|
|
showToast({
|
|
|
|
|
message: localize('com_nav_source_buffer_error'),
|
|
|
|
|
status: 'error',
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}, [showToast, localize, appendNextBuffer]);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const initializeMediaSource = useCallback(() => {
|
|
|
|
|
if (!mediaSourceRef.current) {
|
|
|
|
|
mediaSourceRef.current = new MediaSource();
|
|
|
|
|
audioElementRef.current = new Audio();
|
|
|
|
|
audioElementRef.current.src = URL.createObjectURL(mediaSourceRef.current);
|
|
|
|
|
}
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const mediaSource = mediaSourceRef.current;
|
|
|
|
|
if (mediaSource.readyState === 'open') {
|
|
|
|
|
onSourceOpen();
|
|
|
|
|
} else {
|
|
|
|
|
mediaSource.addEventListener('sourceopen', onSourceOpen);
|
|
|
|
|
}
|
|
|
|
|
}, [onSourceOpen]);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const generateSpeechEdge = useCallback(
|
|
|
|
|
(text: string) => {
|
|
|
|
|
const generate = async () => {
|
|
|
|
|
try {
|
|
|
|
|
if (!ttsRef.current || !audioElementRef.current) {
|
|
|
|
|
throw new Error('TTS or Audio element not initialized');
|
|
|
|
|
}
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
setIsSpeaking(true);
|
|
|
|
|
pendingBuffers.current = [];
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2025-03-27 17:09:46 -04:00
|
|
|
const result = await ttsRef.current.toStream(text);
|
|
|
|
|
const readable = result.audioStream;
|
2024-08-13 02:42:49 -04:00
|
|
|
|
|
|
|
|
readable.on('data', (chunk: Buffer) => {
|
|
|
|
|
pendingBuffers.current.push(new Uint8Array(chunk));
|
|
|
|
|
appendNextBuffer();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
readable.on('end', () => {
|
|
|
|
|
if (mediaSourceRef.current && mediaSourceRef.current.readyState === 'open') {
|
|
|
|
|
mediaSourceRef.current.endOfStream();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
audioElementRef.current.onended = () => {
|
|
|
|
|
setIsSpeaking(false);
|
|
|
|
|
};
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
await audioElementRef.current.play();
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('Error generating speech:', error);
|
|
|
|
|
showToast({
|
2025-02-09 18:05:31 +01:00
|
|
|
message: localize('com_nav_audio_play_error', { 0: (error as Error).message }),
|
2024-08-13 02:42:49 -04:00
|
|
|
status: 'error',
|
|
|
|
|
});
|
2024-08-07 20:15:41 +02:00
|
|
|
setIsSpeaking(false);
|
2024-08-13 02:42:49 -04:00
|
|
|
}
|
|
|
|
|
};
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
generate();
|
2024-08-07 20:15:41 +02:00
|
|
|
},
|
2024-08-15 11:34:25 -04:00
|
|
|
[setIsSpeaking, appendNextBuffer, showToast, localize],
|
2024-08-07 20:15:41 +02:00
|
|
|
);
|
|
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
const cancelSpeechEdge = useCallback(() => {
|
2024-08-07 20:15:41 +02:00
|
|
|
try {
|
|
|
|
|
if (audioElementRef.current) {
|
|
|
|
|
audioElementRef.current.pause();
|
|
|
|
|
audioElementRef.current.currentTime = 0;
|
|
|
|
|
}
|
|
|
|
|
if (mediaSourceRef.current && mediaSourceRef.current.readyState === 'open') {
|
|
|
|
|
mediaSourceRef.current.endOfStream();
|
|
|
|
|
}
|
|
|
|
|
pendingBuffers.current = [];
|
|
|
|
|
setIsSpeaking(false);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('Error cancelling speech:', error);
|
|
|
|
|
showToast({
|
|
|
|
|
message: localize('com_nav_speech_cancel_error'),
|
|
|
|
|
status: 'error',
|
|
|
|
|
});
|
|
|
|
|
}
|
2024-08-15 11:34:25 -04:00
|
|
|
}, [setIsSpeaking, showToast, localize]);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 02:42:49 -04:00
|
|
|
useEffect(() => {
|
2024-08-13 12:08:55 -04:00
|
|
|
if (!isBrowserSupported) {
|
2024-08-13 04:14:37 -04:00
|
|
|
return;
|
|
|
|
|
}
|
2024-08-13 02:42:49 -04:00
|
|
|
fetchVoices();
|
2024-08-13 12:08:55 -04:00
|
|
|
}, [fetchVoices, isBrowserSupported]);
|
2024-08-13 02:42:49 -04:00
|
|
|
|
|
|
|
|
useEffect(() => {
|
2024-08-13 12:08:55 -04:00
|
|
|
if (!isBrowserSupported) {
|
2024-08-13 04:14:37 -04:00
|
|
|
return;
|
|
|
|
|
}
|
2024-08-13 02:42:49 -04:00
|
|
|
initializeTTS();
|
2024-08-13 12:08:55 -04:00
|
|
|
}, [voiceName, initializeTTS, isBrowserSupported]);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
|
|
|
|
useEffect(() => {
|
2024-08-13 12:08:55 -04:00
|
|
|
if (!isBrowserSupported) {
|
2024-08-13 04:14:37 -04:00
|
|
|
return;
|
|
|
|
|
}
|
2024-08-13 02:42:49 -04:00
|
|
|
initializeMediaSource();
|
2024-08-07 20:15:41 +02:00
|
|
|
return () => {
|
|
|
|
|
if (mediaSourceRef.current) {
|
2024-08-13 02:42:49 -04:00
|
|
|
URL.revokeObjectURL(audioElementRef.current?.src ?? '');
|
2024-08-07 20:15:41 +02:00
|
|
|
}
|
|
|
|
|
};
|
2024-08-13 12:08:55 -04:00
|
|
|
}, [initializeMediaSource, isBrowserSupported]);
|
2024-08-07 20:15:41 +02:00
|
|
|
|
2024-08-13 04:14:37 -04:00
|
|
|
if (!isBrowserSupported) {
|
|
|
|
|
return {
|
|
|
|
|
generateSpeechEdge: () => ({}),
|
|
|
|
|
cancelSpeechEdge: () => ({}),
|
|
|
|
|
voices: [],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-13 12:08:55 -04:00
|
|
|
return { generateSpeechEdge, cancelSpeechEdge, voices };
|
2024-08-07 20:15:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export default useTextToSpeechEdge;
|