mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 17:00:15 +01:00
🌩️ feat: cloud-based browser voices (#3297)
* initial voice support * feat: local voices; feat: switch cloud-based voices * feat: apply voice to hook
This commit is contained in:
parent
7d5b03dd98
commit
b34a4ddac1
9 changed files with 177 additions and 37 deletions
|
|
@ -8,20 +8,21 @@ import store from '~/store';
|
|||
import { cn } from '~/utils';
|
||||
import ConversationModeSwitch from './ConversationModeSwitch';
|
||||
import {
|
||||
CloudBrowserVoicesSwitch,
|
||||
AutomaticPlaybackSwitch,
|
||||
TextToSpeechSwitch,
|
||||
EngineTTSDropdown,
|
||||
AutomaticPlaybackSwitch,
|
||||
CacheTTSSwitch,
|
||||
VoiceDropdown,
|
||||
PlaybackRate,
|
||||
} from './TTS';
|
||||
import {
|
||||
DecibelSelector,
|
||||
EngineSTTDropdown,
|
||||
AutoTranscribeAudioSwitch,
|
||||
LanguageSTTDropdown,
|
||||
SpeechToTextSwitch,
|
||||
AutoSendTextSwitch,
|
||||
AutoTranscribeAudioSwitch,
|
||||
EngineSTTDropdown,
|
||||
DecibelSelector,
|
||||
} from './STT';
|
||||
import { useGetCustomConfigSpeechQuery } from 'librechat-data-provider/react-query';
|
||||
|
||||
|
|
@ -42,6 +43,9 @@ function Speech() {
|
|||
const [autoSendText, setAutoSendText] = useRecoilState(store.autoSendText);
|
||||
const [engineTTS, setEngineTTS] = useRecoilState<string>(store.engineTTS);
|
||||
const [voice, setVoice] = useRecoilState<string>(store.voice);
|
||||
const [cloudBrowserVoices, setCloudBrowserVoices] = useRecoilState<boolean>(
|
||||
store.cloudBrowserVoices,
|
||||
);
|
||||
const [languageTTS, setLanguageTTS] = useRecoilState<string>(store.languageTTS);
|
||||
const [automaticPlayback, setAutomaticPlayback] = useRecoilState(store.automaticPlayback);
|
||||
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
|
||||
|
|
@ -61,6 +65,7 @@ function Speech() {
|
|||
autoSendText: { value: autoSendText, setFunc: setAutoSendText },
|
||||
engineTTS: { value: engineTTS, setFunc: setEngineTTS },
|
||||
voice: { value: voice, setFunc: setVoice },
|
||||
cloudBrowserVoices: { value: cloudBrowserVoices, setFunc: setCloudBrowserVoices },
|
||||
languageTTS: { value: languageTTS, setFunc: setLanguageTTS },
|
||||
automaticPlayback: { value: automaticPlayback, setFunc: setAutomaticPlayback },
|
||||
playbackRate: { value: playbackRate, setFunc: setPlaybackRate },
|
||||
|
|
@ -86,6 +91,7 @@ function Speech() {
|
|||
autoSendText,
|
||||
engineTTS,
|
||||
voice,
|
||||
cloudBrowserVoices,
|
||||
languageTTS,
|
||||
automaticPlayback,
|
||||
playbackRate,
|
||||
|
|
@ -101,6 +107,7 @@ function Speech() {
|
|||
setAutoSendText,
|
||||
setEngineTTS,
|
||||
setVoice,
|
||||
setCloudBrowserVoices,
|
||||
setLanguageTTS,
|
||||
setAutomaticPlayback,
|
||||
setPlaybackRate,
|
||||
|
|
@ -168,27 +175,23 @@ function Speech() {
|
|||
|
||||
<Tabs.Content value={'simple'}>
|
||||
<div className="flex flex-col gap-3 text-sm text-black dark:text-gray-50">
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<ConversationModeSwitch />
|
||||
</div>
|
||||
<div className="h-px bg-black/20 bg-white/20" role="none" />
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<SpeechToTextSwitch />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<EngineSTTDropdown />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<LanguageSTTDropdown />
|
||||
</div>
|
||||
<div className="h-px bg-black/20 bg-white/20" role="none" />
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<TextToSpeechSwitch />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<EngineTTSDropdown />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<VoiceDropdown />
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -196,47 +199,52 @@ function Speech() {
|
|||
|
||||
<Tabs.Content value={'advanced'}>
|
||||
<div className="flex flex-col gap-3 text-sm text-black dark:text-gray-50">
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<ConversationModeSwitch />
|
||||
</div>
|
||||
<div className="h-px bg-black/20 bg-white/20" role="none" />
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<SpeechToTextSwitch />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<EngineSTTDropdown />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<LanguageSTTDropdown />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<AutoTranscribeAudioSwitch />
|
||||
</div>
|
||||
{autoTranscribeAudio && (
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<DecibelSelector />
|
||||
</div>
|
||||
)}
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<AutoSendTextSwitch />
|
||||
</div>
|
||||
<div className="h-px bg-black/20 bg-white/20" role="none" />
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<TextToSpeechSwitch />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<AutomaticPlaybackSwitch />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<EngineTTSDropdown />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<VoiceDropdown />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
{engineTTS === 'browser' && (
|
||||
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<CloudBrowserVoicesSwitch />
|
||||
</div>
|
||||
)}
|
||||
<div className="border-b pb-2 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<PlaybackRate />
|
||||
</div>
|
||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||
<div className="border-b last-of-type:border-b-0 dark:border-gray-700">
|
||||
<CacheTTSSwitch />
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,37 @@
|
|||
import { useRecoilState } from 'recoil';
|
||||
import { Switch } from '~/components/ui';
|
||||
import { useLocalize } from '~/hooks';
|
||||
import store from '~/store';
|
||||
|
||||
export default function CloudBrowserVoicesSwitch({
|
||||
onCheckedChange,
|
||||
}: {
|
||||
onCheckedChange?: (value: boolean) => void;
|
||||
}) {
|
||||
const localize = useLocalize();
|
||||
const [cloudBrowserVoices, setCloudBrowserVoices] = useRecoilState<boolean>(
|
||||
store.cloudBrowserVoices,
|
||||
);
|
||||
const [textToSpeech] = useRecoilState<boolean>(store.textToSpeech);
|
||||
|
||||
const handleCheckedChange = (value: boolean) => {
|
||||
setCloudBrowserVoices(value);
|
||||
if (onCheckedChange) {
|
||||
onCheckedChange(value);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-between">
|
||||
<div>{localize('com_nav_enable_cloud_browser_voice')}</div>
|
||||
<Switch
|
||||
id="CloudBrowserVoices"
|
||||
checked={cloudBrowserVoices}
|
||||
onCheckedChange={handleCheckedChange}
|
||||
className="ml-4"
|
||||
data-testid="CloudBrowserVoices"
|
||||
disabled={!textToSpeech}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,34 +1,73 @@
|
|||
import React, { useMemo, useEffect, useState } from 'react';
|
||||
import { useRecoilState } from 'recoil';
|
||||
import { useMemo, useEffect } from 'react';
|
||||
import Dropdown from '~/components/ui/DropdownNoState';
|
||||
import { useVoicesQuery } from '~/data-provider';
|
||||
import { useLocalize } from '~/hooks';
|
||||
import store from '~/store';
|
||||
|
||||
const getLocalVoices = (): Promise<SpeechSynthesisVoice[]> => {
|
||||
return new Promise((resolve) => {
|
||||
const voices = speechSynthesis.getVoices();
|
||||
console.log('voices', voices);
|
||||
|
||||
if (voices.length) {
|
||||
resolve(voices);
|
||||
} else {
|
||||
speechSynthesis.onvoiceschanged = () => resolve(speechSynthesis.getVoices());
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
type VoiceOption = {
|
||||
value: string;
|
||||
display: string;
|
||||
};
|
||||
|
||||
export default function VoiceDropdown() {
|
||||
const localize = useLocalize();
|
||||
const [voice, setVoice] = useRecoilState(store.voice);
|
||||
const { data } = useVoicesQuery();
|
||||
const [engineTTS] = useRecoilState(store.engineTTS);
|
||||
const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
|
||||
const externalTextToSpeech = engineTTS === 'external';
|
||||
const { data: externalVoices = [] } = useVoicesQuery();
|
||||
const [localVoices, setLocalVoices] = useState<SpeechSynthesisVoice[]>([]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!voice && data?.length) {
|
||||
setVoice(data[0]);
|
||||
if (!externalTextToSpeech) {
|
||||
getLocalVoices().then(setLocalVoices);
|
||||
}
|
||||
}, [voice, data, setVoice]);
|
||||
}, [externalTextToSpeech]);
|
||||
|
||||
const voiceOptions = useMemo(
|
||||
() => (data ?? []).map((v: string) => ({ value: v, display: v })),
|
||||
[data],
|
||||
);
|
||||
useEffect(() => {
|
||||
if (voice) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (externalTextToSpeech && externalVoices.length) {
|
||||
setVoice(externalVoices[0]);
|
||||
} else if (!externalTextToSpeech && localVoices.length) {
|
||||
setVoice(localVoices[0].name);
|
||||
}
|
||||
}, [voice, setVoice, externalTextToSpeech, externalVoices, localVoices]);
|
||||
|
||||
const voiceOptions: VoiceOption[] = useMemo(() => {
|
||||
if (externalTextToSpeech) {
|
||||
return externalVoices.map((v) => ({ value: v, display: v }));
|
||||
} else {
|
||||
return localVoices
|
||||
.filter((v) => cloudBrowserVoices || v.localService === true)
|
||||
.map((v) => ({ value: v.name, display: v.name }));
|
||||
}
|
||||
}, [externalTextToSpeech, externalVoices, localVoices, cloudBrowserVoices]);
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-between">
|
||||
<div>{localize('com_nav_voice_select')}</div>
|
||||
<Dropdown
|
||||
value={voice}
|
||||
onChange={(value: string) => setVoice(value)}
|
||||
onChange={setVoice}
|
||||
options={voiceOptions}
|
||||
position={'left'}
|
||||
position="left"
|
||||
testId="VoiceDropdown"
|
||||
/>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
import React from 'react';
|
||||
import '@testing-library/jest-dom/extend-expect';
|
||||
import { render, fireEvent } from 'test/layout-test-utils';
|
||||
import CloudBrowserVoicesSwitch from '../CloudBrowserVoicesSwitch';
|
||||
import { RecoilRoot } from 'recoil';
|
||||
|
||||
describe('CloudBrowserVoicesSwitch', () => {
|
||||
/**
|
||||
* Mock function to set the cache-tts state.
|
||||
*/
|
||||
let mockSetCloudBrowserVoices:
|
||||
| jest.Mock<void, [boolean]>
|
||||
| ((value: boolean) => void)
|
||||
| undefined;
|
||||
|
||||
beforeEach(() => {
|
||||
mockSetCloudBrowserVoices = jest.fn();
|
||||
});
|
||||
|
||||
it('renders correctly', () => {
|
||||
const { getByTestId } = render(
|
||||
<RecoilRoot>
|
||||
<CloudBrowserVoicesSwitch />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
|
||||
expect(getByTestId('CloudBrowserVoices')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it('calls onCheckedChange when the switch is toggled', () => {
|
||||
const { getByTestId } = render(
|
||||
<RecoilRoot>
|
||||
<CloudBrowserVoicesSwitch onCheckedChange={mockSetCloudBrowserVoices} />
|
||||
</RecoilRoot>,
|
||||
);
|
||||
const switchElement = getByTestId('CloudBrowserVoices');
|
||||
fireEvent.click(switchElement);
|
||||
|
||||
expect(mockSetCloudBrowserVoices).toHaveBeenCalledWith(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
export { default as CloudBrowserVoicesSwitch } from './CloudBrowserVoicesSwitch';
|
||||
export { default as AutomaticPlaybackSwitch } from './AutomaticPlaybackSwitch';
|
||||
export { default as CacheTTSSwitch } from './CacheTTSSwitch';
|
||||
export { default as EngineTTSDropdown } from './EngineTTSDropdown';
|
||||
export { default as PlaybackRate } from './PlaybackRate';
|
||||
export { default as TextToSpeechSwitch } from './TextToSpeechSwitch';
|
||||
export { default as EngineTTSDropdown } from './EngineTTSDropdown';
|
||||
export { default as CacheTTSSwitch } from './CacheTTSSwitch';
|
||||
export { default as VoiceDropdown } from './VoiceDropdown';
|
||||
export { default as PlaybackRate } from './PlaybackRate';
|
||||
|
|
|
|||
|
|
@ -1,12 +1,24 @@
|
|||
import { useRecoilState } from 'recoil';
|
||||
import { useState } from 'react';
|
||||
import store from '~/store';
|
||||
|
||||
function useTextToSpeechBrowser() {
|
||||
const [cloudBrowserVoices] = useRecoilState(store.cloudBrowserVoices);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const [voiceName] = useRecoilState(store.voice);
|
||||
|
||||
const generateSpeechLocal = (text: string) => {
|
||||
const synth = window.speechSynthesis;
|
||||
const voices = synth.getVoices().filter((v) => cloudBrowserVoices || v.localService === true);
|
||||
const voice = voices.find((v) => v.name === voiceName);
|
||||
|
||||
if (!voice) {
|
||||
return;
|
||||
}
|
||||
|
||||
synth.cancel();
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.voice = voice;
|
||||
utterance.onend = () => {
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -641,6 +641,7 @@ export default {
|
|||
com_nav_delete_cache_storage: 'Delete TTS cache storage',
|
||||
com_nav_enable_cache_tts: 'Enable cache TTS',
|
||||
com_nav_voice_select: 'Voice',
|
||||
com_nav_enable_cloud_browser_voice: 'Use cloud-based voices',
|
||||
com_nav_info_enter_to_send:
|
||||
'When enabled, pressing `ENTER` will send your message. When disabled, pressing Enter will add a new line, and you\'ll need to press `CTRL + ENTER` to send your message.',
|
||||
com_nav_info_save_draft:
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ const localStorageAtoms = {
|
|||
textToSpeech: atomWithLocalStorage('textToSpeech', true),
|
||||
engineTTS: atomWithLocalStorage('engineTTS', 'browser'),
|
||||
voice: atomWithLocalStorage('voice', ''),
|
||||
cloudBrowserVoices: atomWithLocalStorage('cloudBrowserVoices', false),
|
||||
languageTTS: atomWithLocalStorage('languageTTS', ''),
|
||||
automaticPlayback: atomWithLocalStorage('automaticPlayback', false),
|
||||
playbackRate: atomWithLocalStorage<number | null>('playbackRate', null),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue