🌩️ feat: cloud-based browser voices (#3297)

* initial voice support

* feat: local voices; feat: switch cloud-based voices

* feat: apply voice to hook
This commit is contained in:
Marco Beretta 2024-07-10 22:44:12 +02:00 committed by GitHub
parent 7d5b03dd98
commit b34a4ddac1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 177 additions and 37 deletions

View file

@ -8,20 +8,21 @@ import store from '~/store';
import { cn } from '~/utils';
import ConversationModeSwitch from './ConversationModeSwitch';
import {
CloudBrowserVoicesSwitch,
AutomaticPlaybackSwitch,
TextToSpeechSwitch,
EngineTTSDropdown,
AutomaticPlaybackSwitch,
CacheTTSSwitch,
VoiceDropdown,
PlaybackRate,
} from './TTS';
import {
DecibelSelector,
EngineSTTDropdown,
AutoTranscribeAudioSwitch,
LanguageSTTDropdown,
SpeechToTextSwitch,
AutoSendTextSwitch,
AutoTranscribeAudioSwitch,
EngineSTTDropdown,
DecibelSelector,
} from './STT';
import { useGetCustomConfigSpeechQuery } from 'librechat-data-provider/react-query';
@ -42,6 +43,9 @@ function Speech() {
const [autoSendText, setAutoSendText] = useRecoilState(store.autoSendText);
const [engineTTS, setEngineTTS] = useRecoilState<string>(store.engineTTS);
const [voice, setVoice] = useRecoilState<string>(store.voice);
const [cloudBrowserVoices, setCloudBrowserVoices] = useRecoilState<boolean>(
store.cloudBrowserVoices,
);
const [languageTTS, setLanguageTTS] = useRecoilState<string>(store.languageTTS);
const [automaticPlayback, setAutomaticPlayback] = useRecoilState(store.automaticPlayback);
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
@ -61,6 +65,7 @@ function Speech() {
autoSendText: { value: autoSendText, setFunc: setAutoSendText },
engineTTS: { value: engineTTS, setFunc: setEngineTTS },
voice: { value: voice, setFunc: setVoice },
cloudBrowserVoices: { value: cloudBrowserVoices, setFunc: setCloudBrowserVoices },
languageTTS: { value: languageTTS, setFunc: setLanguageTTS },
automaticPlayback: { value: automaticPlayback, setFunc: setAutomaticPlayback },
playbackRate: { value: playbackRate, setFunc: setPlaybackRate },
@ -86,6 +91,7 @@ function Speech() {
autoSendText,
engineTTS,
voice,
cloudBrowserVoices,
languageTTS,
automaticPlayback,
playbackRate,
@ -101,6 +107,7 @@ function Speech() {
setAutoSendText,
setEngineTTS,
setVoice,
setCloudBrowserVoices,
setLanguageTTS,
setAutomaticPlayback,
setPlaybackRate,
@ -168,27 +175,23 @@ function Speech() {
<Tabs.Content value={'simple'}>
<div className="flex flex-col gap-3 text-sm text-black dark:text-gray-50">
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<ConversationModeSwitch />
</div>
<div className="h-px bg-black/20 bg-white/20" role="none" />
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<SpeechToTextSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<EngineSTTDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<LanguageSTTDropdown />
</div>
<div className="h-px bg-black/20 bg-white/20" role="none" />
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<TextToSpeechSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<EngineTTSDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<VoiceDropdown />
</div>
</div>
@ -196,47 +199,52 @@ function Speech() {
<Tabs.Content value={'advanced'}>
<div className="flex flex-col gap-3 text-sm text-black dark:text-gray-50">
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<ConversationModeSwitch />
</div>
<div className="h-px bg-black/20 bg-white/20" role="none" />
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<SpeechToTextSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<EngineSTTDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<LanguageSTTDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
<AutoTranscribeAudioSwitch />
</div>
{autoTranscribeAudio && (
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
<DecibelSelector />
</div>
)}
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<AutoSendTextSwitch />
</div>
<div className="h-px bg-black/20 bg-white/20" role="none" />
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<TextToSpeechSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<AutomaticPlaybackSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<EngineTTSDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<VoiceDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
{engineTTS === 'browser' && (
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
<CloudBrowserVoicesSwitch />
</div>
)}
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
<PlaybackRate />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
<CacheTTSSwitch />
</div>
</div>

View file

@ -0,0 +1,37 @@
import { useRecoilState } from 'recoil';
import { Switch } from '~/components/ui';
import { useLocalize } from '~/hooks';
import store from '~/store';
export default function CloudBrowserVoicesSwitch({
onCheckedChange,
}: {
onCheckedChange?: (value: boolean) => void;
}) {
const localize = useLocalize();
const [cloudBrowserVoices, setCloudBrowserVoices] = useRecoilState<boolean>(
store.cloudBrowserVoices,
);
const [textToSpeech] = useRecoilState<boolean>(store.textToSpeech);
const handleCheckedChange = (value: boolean) => {
setCloudBrowserVoices(value);
if (onCheckedChange) {
onCheckedChange(value);
}
};
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_enable_cloud_browser_voice')}</div>
<Switch
id="CloudBrowserVoices"
checked={cloudBrowserVoices}
onCheckedChange={handleCheckedChange}
className="ml-4"
data-testid="CloudBrowserVoices"
disabled={!textToSpeech}
/>
</div>
);
}

View file

@ -1,34 +1,73 @@
import React, { useMemo, useEffect, useState } from 'react';
import { useRecoilState } from 'recoil';
import { useMemo, useEffect } from 'react';
import Dropdown from '~/components/ui/DropdownNoState';
import { useVoicesQuery } from '~/data-provider';
import { useLocalize } from '~/hooks';
import store from '~/store';
const getLocalVoices = (): Promise<SpeechSynthesisVoice[]> => {
return new Promise((resolve) => {
const voices = speechSynthesis.getVoices();
console.log('voices', voices);
if (voices.length) {
resolve(voices);
} else {
speechSynthesis.onvoiceschanged = () => resolve(speechSynthesis.getVoices());
}
});
};
type VoiceOption = {
value: string;
display: string;
};
export default function VoiceDropdown() {
const localize = useLocalize();
const [voice, setVoice] = useRecoilState(store.voice);
const { data } = useVoicesQuery();
const [engineTTS] = useRecoilState(store.engineTTS);
const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
const externalTextToSpeech = engineTTS === 'external';
const { data: externalVoices = [] } = useVoicesQuery();
const [localVoices, setLocalVoices] = useState<SpeechSynthesisVoice[]>([]);
useEffect(() => {
if (!voice && data?.length) {
setVoice(data[0]);
if (!externalTextToSpeech) {
getLocalVoices().then(setLocalVoices);
}
}, [voice, data, setVoice]);
}, [externalTextToSpeech]);
const voiceOptions = useMemo(
() => (data ?? []).map((v: string) => ({ value: v, display: v })),
[data],
);
useEffect(() => {
if (voice) {
return;
}
if (externalTextToSpeech && externalVoices.length) {
setVoice(externalVoices[0]);
} else if (!externalTextToSpeech && localVoices.length) {
setVoice(localVoices[0].name);
}
}, [voice, setVoice, externalTextToSpeech, externalVoices, localVoices]);
const voiceOptions: VoiceOption[] = useMemo(() => {
if (externalTextToSpeech) {
return externalVoices.map((v) => ({ value: v, display: v }));
} else {
return localVoices
.filter((v) => cloudBrowserVoices || v.localService === true)
.map((v) => ({ value: v.name, display: v.name }));
}
}, [externalTextToSpeech, externalVoices, localVoices, cloudBrowserVoices]);
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_voice_select')}</div>
<Dropdown
value={voice}
onChange={(value: string) => setVoice(value)}
onChange={setVoice}
options={voiceOptions}
position={'left'}
position="left"
testId="VoiceDropdown"
/>
</div>

View file

@ -0,0 +1,41 @@
import React from 'react';
import '@testing-library/jest-dom/extend-expect';
import { render, fireEvent } from 'test/layout-test-utils';
import CloudBrowserVoicesSwitch from '../CloudBrowserVoicesSwitch';
import { RecoilRoot } from 'recoil';
describe('CloudBrowserVoicesSwitch', () => {
/**
* Mock function to set the cache-tts state.
*/
let mockSetCloudBrowserVoices:
| jest.Mock<void, [boolean]>
| ((value: boolean) => void)
| undefined;
beforeEach(() => {
mockSetCloudBrowserVoices = jest.fn();
});
it('renders correctly', () => {
const { getByTestId } = render(
<RecoilRoot>
<CloudBrowserVoicesSwitch />
</RecoilRoot>,
);
expect(getByTestId('CloudBrowserVoices')).toBeInTheDocument();
});
it('calls onCheckedChange when the switch is toggled', () => {
const { getByTestId } = render(
<RecoilRoot>
<CloudBrowserVoicesSwitch onCheckedChange={mockSetCloudBrowserVoices} />
</RecoilRoot>,
);
const switchElement = getByTestId('CloudBrowserVoices');
fireEvent.click(switchElement);
expect(mockSetCloudBrowserVoices).toHaveBeenCalledWith(true);
});
});

View file

@ -1,6 +1,7 @@
export { default as CloudBrowserVoicesSwitch } from './CloudBrowserVoicesSwitch';
export { default as AutomaticPlaybackSwitch } from './AutomaticPlaybackSwitch';
export { default as CacheTTSSwitch } from './CacheTTSSwitch';
export { default as EngineTTSDropdown } from './EngineTTSDropdown';
export { default as PlaybackRate } from './PlaybackRate';
export { default as TextToSpeechSwitch } from './TextToSpeechSwitch';
export { default as EngineTTSDropdown } from './EngineTTSDropdown';
export { default as CacheTTSSwitch } from './CacheTTSSwitch';
export { default as VoiceDropdown } from './VoiceDropdown';
export { default as PlaybackRate } from './PlaybackRate';

View file

@ -1,12 +1,24 @@
import { useRecoilState } from 'recoil';
import { useState } from 'react';
import store from '~/store';
function useTextToSpeechBrowser() {
const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
const [isSpeaking, setIsSpeaking] = useState(false);
const [voiceName] = useRecoilState(store.voice);
const generateSpeechLocal = (text: string) => {
const synth = window.speechSynthesis;
const voices = synth.getVoices().filter((v) => cloudBrowserVoices || v.localService === true);
const voice = voices.find((v) => v.name === voiceName);
if (!voice) {
return;
}
synth.cancel();
const utterance = new SpeechSynthesisUtterance(text);
utterance.voice = voice;
utterance.onend = () => {
setIsSpeaking(false);
};

View file

@ -641,6 +641,7 @@ export default {
com_nav_delete_cache_storage: 'Delete TTS cache storage',
com_nav_enable_cache_tts: 'Enable cache TTS',
com_nav_voice_select: 'Voice',
com_nav_enable_cloud_browser_voice: 'Use cloud-based voices',
com_nav_info_enter_to_send:
'When enabled, pressing `ENTER` will send your message. When disabled, pressing Enter will add a new line, and you\'ll need to press `CTRL + ENTER` to send your message.',
com_nav_info_save_draft:

View file

@ -50,6 +50,7 @@ const localStorageAtoms = {
textToSpeech: atomWithLocalStorage('textToSpeech', true),
engineTTS: atomWithLocalStorage('engineTTS', 'browser'),
voice: atomWithLocalStorage('voice', ''),
cloudBrowserVoices: atomWithLocalStorage('cloudBrowserVoices', false),
languageTTS: atomWithLocalStorage('languageTTS', ''),
automaticPlayback: atomWithLocalStorage('automaticPlayback', false),
playbackRate: atomWithLocalStorage<number | null>('playbackRate', null),