🎤 feat: add custom speech config, browser TTS/STT features, and dynamic speech tab settings (#2921)

* feat: update useTextToSpeech and useSpeechToText hooks to support external audio endpoints

This commit updates the useTextToSpeech and useSpeechToText hooks in the Input directory to support external audio endpoints. It introduces the useGetExternalTextToSpeech and useGetExternalSpeechToText hooks, which determine whether the audio endpoints should be set to 'browser' or 'external' based on the value of the endpointTTS and endpointSTT Recoil states. The useTextToSpeech and useSpeechToText hooks now use these new hooks to determine whether to use external audio endpoints

* feat: add userSelect style to ConversationModeSwitch label

* fix: remove unused updateTokenWebsocket function and import

The updateTokenWebsocket function and its import are no longer used in the OpenAIClient module. This commit removes the function and import to clean up the codebase

* feat: support external audio endpoints in useTextToSpeech and useSpeechToText hooks

This commit updates the useTextToSpeech and useSpeechToText hooks in the Input directory to support external audio endpoints. It introduces the useGetExternalTextToSpeech and useGetExternalSpeechToText hooks, which determine whether the audio endpoints should be set to 'browser' or 'external' based on the value of the endpointTTS and endpointSTT Recoil states. The useTextToSpeech and useSpeechToText hooks now use these new hooks to determine whether to use external audio endpoints

* feat: update AutomaticPlayback component to AutomaticPlaybackSwitch; tests: added AutomaticPlaybackSwitch.spec
>
> This commit renames the AutomaticPlayback component to AutomaticPlaybackSwitch in the Speech directory. The new name better reflects the purpose of the component and aligns with the naming convention used in the codebase.

* feat: update useSpeechToText hook to include interimTranscript

This commit updates the useSpeechToText hook in the client/src/components/Chat/Input/AudioRecorder.tsx file to include the interimTranscript state. This allows for real-time display of the speech-to-text transcription while the user is still speaking. The interimTranscript is now used to update the text area value during recording.

* feat: Add customConfigSpeech API endpoint for retrieving custom speech configuration

This commit adds a new API endpoint  in the  file under the  directory. This endpoint is responsible for retrieving the custom speech configuration using the  function from the  module

* feat: update store var  and ; fix: getCustomConfigSpeech

* fix: client tests, removed unused import

* feat: Update useCustomConfigSpeechQuery to return an array of custom speech configurations

This commit modifies the useCustomConfigSpeechQuery function in the client/src/data-provider/queries.ts file to return an array of custom speech configurations instead of a single object. This change allows for better handling and manipulation of the data in the application

* feat: Update useCustomConfigSpeechQuery to return an array of custom speech configurations

* refactor: Update variable name in speechTab schema

* refactor: removed unused and nested code

* fix: using recoilState

* refactor: Update Speech component to use useCallback for setting settings

* fix: test

* fix: tests

* feature: ensure that the settings don't change after modifying then through the UI

* remove comment

* fix: Handle error gracefully in getCustomConfigSpeech and getVoices endpoints

* fix: Handle error

* fix: backend tests

* fix: invalid custom config logging

* chore: add back custom config info logging

* chore: revert loadCustomConfig spec

---------

Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
Marco Beretta 2024-07-05 17:13:34 +03:00 committed by GitHub
parent 5d985746cb
commit 1aad315de6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 598 additions and 179 deletions

View file

@ -27,7 +27,6 @@ const {
createContextHandlers,
} = require('./prompts');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { updateTokenWebsocket } = require('~/server/services/Files/Audio');
const { isEnabled, sleep } = require('~/server/utils');
const { handleOpenAIErrors } = require('./tools/util');
const spendTokens = require('~/models/spendTokens');
@ -595,7 +594,6 @@ class OpenAIClient extends BaseClient {
payload,
(progressMessage) => {
if (progressMessage === '[DONE]') {
updateTokenWebsocket('[DONE]');
return;
}

View file

@ -1,19 +1,11 @@
const express = require('express');
const {
uaParser,
checkBan,
requireJwtAuth,
createFileLimiters,
createTTSLimiters,
createSTTLimiters,
} = require('~/server/middleware');
const { uaParser, checkBan, requireJwtAuth, createFileLimiters } = require('~/server/middleware');
const { createMulterInstance } = require('./multer');
const files = require('./files');
const images = require('./images');
const avatar = require('./avatar');
const stt = require('./stt');
const tts = require('./tts');
const speech = require('./speech');
const initialize = async () => {
const router = express.Router();
@ -21,11 +13,8 @@ const initialize = async () => {
router.use(checkBan);
router.use(uaParser);
/* Important: stt/tts routes must be added before the upload limiters */
const { sttIpLimiter, sttUserLimiter } = createSTTLimiters();
const { ttsIpLimiter, ttsUserLimiter } = createTTSLimiters();
router.use('/stt', sttIpLimiter, sttUserLimiter, stt);
router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
/* Important: speech route must be added before the upload limiters */
router.use('/speech', speech);
const upload = await createMulterInstance();
const { fileUploadIpLimiter, fileUploadUserLimiter } = createFileLimiters();

View file

@ -0,0 +1,10 @@
const express = require('express');
const router = express.Router();
const { getCustomConfigSpeech } = require('~/server/services/Files/Audio');
router.get('/get', async (req, res) => {
await getCustomConfigSpeech(req, res);
});
module.exports = router;

View file

@ -0,0 +1,17 @@
const express = require('express');
const { createTTSLimiters, createSTTLimiters } = require('~/server/middleware');
const stt = require('./stt');
const tts = require('./tts');
const customConfigSpeech = require('./customConfigSpeech');
const router = express.Router();
const { sttIpLimiter, sttUserLimiter } = createSTTLimiters();
const { ttsIpLimiter, ttsUserLimiter } = createTTSLimiters();
router.use('/stt', sttIpLimiter, sttUserLimiter, stt);
router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
router.use('/config', customConfigSpeech);
module.exports = router;

View file

@ -76,8 +76,28 @@ Please specify a correct \`imageOutputType\` value (case-sensitive).
);
}
if (!result.success) {
i === 0 && logger.error(`Invalid custom config file at ${configPath}`, result.error);
i === 0 && i++;
let errorMessage = `Invalid custom config file at ${configPath}:
${JSON.stringify(result.error, null, 2)}`;
if (i === 0) {
logger.error(errorMessage);
const speechError = result.error.errors.find(
(err) =>
err.code === 'unrecognized_keys' &&
(err.message?.includes('stt') || err.message?.includes('tts')),
);
if (speechError) {
logger.warn(`
The Speech-to-text and Text-to-speech configuration format has recently changed.
If you're getting this error, please refer to the latest documentation:
https://www.librechat.ai/docs/configuration/stt_tts`);
}
i++;
}
return null;
} else {
logger.info('Custom config file loaded:');

View file

@ -0,0 +1,50 @@
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
/**
* This function retrieves the speechTab settings from the custom configuration
* It first fetches the custom configuration
* Then, it checks if the custom configuration and the speechTab schema exist
* If they do, it sends the speechTab settings as a JSON response
* If they don't, it throws an error
*
* @param {Object} req - The request object
* @param {Object} res - The response object
* @returns {Promise<void>}
* @throws {Error} - If the custom configuration or the speechTab schema is missing, an error is thrown
*/
async function getCustomConfigSpeech(req, res) {
try {
const customConfig = await getCustomConfig();
if (!customConfig || !customConfig.speech?.speechTab) {
throw new Error('Configuration or speechTab schema is missing');
}
const ttsSchema = customConfig.speech?.speechTab;
let settings = {};
if (ttsSchema.advancedMode !== undefined) {
settings.advancedMode = ttsSchema.advancedMode;
}
if (ttsSchema.speechToText) {
for (const key in ttsSchema.speechToText) {
if (ttsSchema.speechToText[key] !== undefined) {
settings[key] = ttsSchema.speechToText[key];
}
}
}
if (ttsSchema.textToSpeech) {
for (const key in ttsSchema.textToSpeech) {
if (ttsSchema.textToSpeech[key] !== undefined) {
settings[key] = ttsSchema.textToSpeech[key];
}
}
}
res.json(settings);
} catch (error) {
res.status(200).send();
}
}
module.exports = getCustomConfigSpeech;

View file

@ -1,4 +1,3 @@
const { logger } = require('~/config');
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
const { getProvider } = require('./textToSpeech');
@ -16,11 +15,11 @@ async function getVoices(req, res) {
try {
const customConfig = await getCustomConfig();
if (!customConfig || !customConfig?.tts) {
if (!customConfig || !customConfig?.speech?.tts) {
throw new Error('Configuration or TTS schema is missing');
}
const ttsSchema = customConfig?.tts;
const ttsSchema = customConfig?.speech?.tts;
const provider = getProvider(ttsSchema);
let voices;
@ -40,8 +39,7 @@ async function getVoices(req, res) {
res.json(voices);
} catch (error) {
logger.error(`Failed to get voices: ${error.message}`);
res.status(500).json({ error: 'Failed to get voices' });
res.status(500).json({ error: `Failed to get voices: ${error.message}` });
}
}

View file

@ -1,11 +1,11 @@
const getVoices = require('./getVoices');
const getCustomConfigSpeech = require('./getCustomConfigSpeech');
const textToSpeech = require('./textToSpeech');
const speechToText = require('./speechToText');
const { updateTokenWebsocket } = require('./webSocket');
module.exports = {
getVoices,
getCustomConfigSpeech,
speechToText,
...textToSpeech,
updateTokenWebsocket,
};

View file

@ -25,7 +25,7 @@ async function handleResponse(response) {
}
function getProvider(sttSchema) {
if (sttSchema.openai) {
if (sttSchema?.openai) {
return 'openai';
}
@ -176,7 +176,7 @@ async function speechToText(req, res) {
const audioReadStream = Readable.from(audioBuffer);
audioReadStream.path = 'audio.wav';
const provider = getProvider(customConfig.stt);
const provider = getProvider(customConfig.speech.stt);
let [url, data, headers] = [];

View file

@ -191,8 +191,8 @@ function localAIProvider(ttsSchema, input, voice) {
* @returns {Promise<[string, TProviderSchema]>}
*/
async function getProviderSchema(customConfig) {
const provider = getProvider(customConfig.tts);
return [provider, customConfig.tts[provider]];
const provider = getProvider(customConfig.speech.tts);
return [provider, customConfig.speech.tts[provider]];
}
/**

View file

@ -1,31 +0,0 @@
let token = '';
function updateTokenWebsocket(newToken) {
console.log('Token:', newToken);
token = newToken;
}
function sendTextToWebsocket(ws, onDataReceived) {
if (token === '[DONE]') {
ws.send(' ');
return;
}
if (ws.readyState === WebSocket.OPEN) {
ws.send(token);
ws.onmessage = function (event) {
console.log('Received:', event.data);
if (onDataReceived) {
onDataReceived(event.data); // Pass the received data to the callback function
}
};
} else {
console.error('WebSocket is not open. Ready state is: ' + ws.readyState);
}
}
module.exports = {
updateTokenWebsocket,
sendTextToWebsocket,
};

View file

@ -31,15 +31,26 @@ export default function AudioRecorder({
}
};
const { isListening, isLoading, startRecording, stopRecording, speechText, clearText } =
useSpeechToText(handleTranscriptionComplete);
const {
isListening,
isLoading,
startRecording,
stopRecording,
interimTranscript,
speechText,
clearText,
} = useSpeechToText(handleTranscriptionComplete);
useEffect(() => {
if (textAreaRef.current) {
if (isListening && textAreaRef.current) {
methods.setValue('text', interimTranscript, {
shouldValidate: true,
});
} else if (textAreaRef.current) {
textAreaRef.current.value = speechText;
methods.setValue('text', speechText, { shouldValidate: true });
}
}, [speechText, methods, textAreaRef]);
}, [interimTranscript, speechText, methods, textAreaRef]);
const handleStartRecording = async () => {
await startRecording();

View file

@ -38,8 +38,8 @@ const ChatForm = ({ index = 0 }) => {
const submitButtonRef = useRef<HTMLButtonElement>(null);
const textAreaRef = useRef<HTMLTextAreaElement | null>(null);
const SpeechToText = useRecoilValue(store.SpeechToText);
const TextToSpeech = useRecoilValue(store.TextToSpeech);
const SpeechToText = useRecoilValue(store.speechToText);
const TextToSpeech = useRecoilValue(store.textToSpeech);
const automaticPlayback = useRecoilValue(store.automaticPlayback);
const [showStopButton, setShowStopButton] = useRecoilState(store.showStopButtonByIndex(index));

View file

@ -96,7 +96,7 @@ export default function DataTable<TData, TValue>({ columns, data }: DataTablePro
deleteFiles({ files: filesToDelete as TFile[] });
setRowSelection({});
}}
className="dark:hover:bg-gray-850/25 ml-1 gap-2 sm:ml-0"
className="ml-1 gap-2 dark:hover:bg-gray-850/25 sm:ml-0"
disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting}
>
{isDeleting ? (

View file

@ -39,7 +39,7 @@ export default function HoverButtons({
const { endpoint: _endpoint, endpointType } = conversation ?? {};
const endpoint = endpointType ?? _endpoint;
const [isCopied, setIsCopied] = useState(false);
const [TextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
const [TextToSpeech] = useRecoilState<boolean>(store.textToSpeech);
const {
hideEditButton,

View file

@ -106,7 +106,7 @@ export default function DataTableFile<TData, TValue>({
deleteFiles({ files: filesToDelete as TFile[] });
setRowSelection({});
}}
className="dark:hover:bg-gray-850/25 ml-1 gap-2 sm:ml-0"
className="ml-1 gap-2 dark:hover:bg-gray-850/25 sm:ml-0"
disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting}
>
{isDeleting ? (

View file

@ -75,8 +75,8 @@ export const fileTableColumns: ColumnDef<TFile>[] = [
return (
<>
{attachedVectorStores.map((vectorStore, index) => {
if (index === 4)
{return (
if (index === 4) {
return (
<span
key={index}
className="ml-2 mt-2 flex w-fit flex-row items-center rounded-full bg-[#f5f5f5] px-2 text-gray-500"
@ -85,8 +85,11 @@ export const fileTableColumns: ColumnDef<TFile>[] = [
&nbsp;
{attachedVectorStores.length - index} more
</span>
);}
if (index > 4) {return null;}
);
}
if (index > 4) {
return null;
}
return (
<span key={index} className="ml-2 mt-2 rounded-full bg-[#f2f8ec] px-2 text-[#91c561]">
{vectorStore.name}

View file

@ -8,7 +8,7 @@ export default function ScrollToBottom({ scrollHandler }: Props) {
return (
<button
onClick={scrollHandler}
className="dark:bg-gray-850/90 absolute bottom-5 right-1/2 cursor-pointer rounded-full border border-gray-200 bg-white bg-clip-padding text-gray-600 dark:border-white/10 dark:text-gray-200"
className="absolute bottom-5 right-1/2 cursor-pointer rounded-full border border-gray-200 bg-white bg-clip-padding text-gray-600 dark:border-white/10 dark:bg-gray-850/90 dark:text-gray-200"
>
<svg
width="24"

View file

@ -10,18 +10,16 @@ export default function ConversationModeSwitch({
}) {
const localize = useLocalize();
const [conversationMode, setConversationMode] = useRecoilState<boolean>(store.conversationMode);
const [advancedMode] = useRecoilState<boolean>(store.advancedMode);
const [textToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
const [speechToText] = useRecoilState<boolean>(store.speechToText);
const [textToSpeech] = useRecoilState<boolean>(store.textToSpeech);
const [, setAutoSendText] = useRecoilState<boolean>(store.autoSendText);
const [, setDecibelValue] = useRecoilState(store.decibelValue);
const [, setAutoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const handleCheckedChange = (value: boolean) => {
if (!advancedMode) {
setAutoTranscribeAudio(value);
setAutoSendText(value);
setDecibelValue(-45);
}
setConversationMode(value);
if (onCheckedChange) {
onCheckedChange(value);
@ -40,7 +38,7 @@ export default function ConversationModeSwitch({
onCheckedChange={handleCheckedChange}
className="ml-4"
data-testid="ConversationMode"
disabled={!textToSpeech}
disabled={!textToSpeech || !speechToText}
/>
</div>
</div>

View file

@ -10,7 +10,7 @@ export default function AutoSendTextSwitch({
}) {
const localize = useLocalize();
const [autoSendText, setAutoSendText] = useRecoilState<boolean>(store.autoSendText);
const [SpeechToText] = useRecoilState<boolean>(store.SpeechToText);
const [SpeechToText] = useRecoilState<boolean>(store.speechToText);
const handleCheckedChange = (value: boolean) => {
setAutoSendText(value);

View file

@ -12,7 +12,7 @@ export default function AutoTranscribeAudioSwitch({
const [autoTranscribeAudio, setAutoTranscribeAudio] = useRecoilState<boolean>(
store.autoTranscribeAudio,
);
const [speechToText] = useRecoilState<boolean>(store.SpeechToText);
const [speechToText] = useRecoilState<boolean>(store.speechToText);
const handleCheckedChange = (value: boolean) => {
setAutoTranscribeAudio(value);

View file

@ -7,7 +7,7 @@ import { cn, defaultTextProps, optionText } from '~/utils/';
export default function DecibelSelector() {
const localize = useLocalize();
const speechToText = useRecoilValue(store.SpeechToText);
const speechToText = useRecoilValue(store.speechToText);
const [decibelValue, setDecibelValue] = useRecoilState(store.decibelValue);
return (

View file

@ -5,21 +5,21 @@ import store from '~/store';
export default function EngineSTTDropdown() {
const localize = useLocalize();
const [endpointSTT, setEndpointSTT] = useRecoilState<string>(store.endpointSTT);
const [engineSTT, setEngineSTT] = useRecoilState<string>(store.engineSTT);
const endpointOptions = [
{ value: 'browser', display: localize('com_nav_browser') },
{ value: 'external', display: localize('com_nav_external') },
];
const handleSelect = (value: string) => {
setEndpointSTT(value);
setEngineSTT(value);
};
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_engine')}</div>
<Dropdown
value={endpointSTT}
value={engineSTT}
onChange={handleSelect}
options={endpointOptions}
width={180}

View file

@ -0,0 +1,107 @@
import { useRecoilState } from 'recoil';
import { Dropdown } from '~/components/ui';
import { useLocalize } from '~/hooks';
import store from '~/store';
export default function LanguageSTTDropdown() {
const localize = useLocalize();
const [languageSTT, setLanguageSTT] = useRecoilState<string>(store.languageSTT);
const languageOptions = [
{ value: 'af', display: 'Afrikaans' },
{ value: 'eu', display: 'Basque' },
{ value: 'bg', display: 'Bulgarian' },
{ value: 'ca', display: 'Catalan' },
{ value: 'ar-EG', display: 'Arabic (Egypt)' },
{ value: 'ar-JO', display: 'Arabic (Jordan)' },
{ value: 'ar-KW', display: 'Arabic (Kuwait)' },
{ value: 'ar-LB', display: 'Arabic (Lebanon)' },
{ value: 'ar-QA', display: 'Arabic (Qatar)' },
{ value: 'ar-AE', display: 'Arabic (UAE)' },
{ value: 'ar-MA', display: 'Arabic (Morocco)' },
{ value: 'ar-IQ', display: 'Arabic (Iraq)' },
{ value: 'ar-DZ', display: 'Arabic (Algeria)' },
{ value: 'ar-BH', display: 'Arabic (Bahrain)' },
{ value: 'ar-LY', display: 'Arabic (Libya)' },
{ value: 'ar-OM', display: 'Arabic (Oman)' },
{ value: 'ar-SA', display: 'Arabic (Saudi Arabia)' },
{ value: 'ar-TN', display: 'Arabic (Tunisia)' },
{ value: 'ar-YE', display: 'Arabic (Yemen)' },
{ value: 'cs', display: 'Czech' },
{ value: 'nl-NL', display: 'Dutch' },
{ value: 'en-AU', display: 'English (Australia)' },
{ value: 'en-CA', display: 'English (Canada)' },
{ value: 'en-IN', display: 'English (India)' },
{ value: 'en-NZ', display: 'English (New Zealand)' },
{ value: 'en-ZA', display: 'English (South Africa)' },
{ value: 'en-GB', display: 'English (UK)' },
{ value: 'en-US', display: 'English (US)' },
{ value: 'fi', display: 'Finnish' },
{ value: 'fr-FR', display: 'French' },
{ value: 'gl', display: 'Galician' },
{ value: 'de-DE', display: 'German' },
{ value: 'el-GR', display: 'Greek' },
{ value: 'he', display: 'Hebrew' },
{ value: 'hu', display: 'Hungarian' },
{ value: 'is', display: 'Icelandic' },
{ value: 'it-IT', display: 'Italian' },
{ value: 'id', display: 'Indonesian' },
{ value: 'ja', display: 'Japanese' },
{ value: 'ko', display: 'Korean' },
{ value: 'la', display: 'Latin' },
{ value: 'zh-CN', display: 'Mandarin Chinese' },
{ value: 'zh-TW', display: 'Taiwanese' },
{ value: 'zh-HK', display: 'Cantonese' },
{ value: 'ms-MY', display: 'Malaysian' },
{ value: 'no-NO', display: 'Norwegian' },
{ value: 'pl', display: 'Polish' },
{ value: 'xx-piglatin', display: 'Pig Latin' },
{ value: 'pt-PT', display: 'Portuguese' },
{ value: 'pt-br', display: 'Portuguese (Brasil)' },
{ value: 'ro-RO', display: 'Romanian' },
{ value: 'ru', display: 'Russian' },
{ value: 'sr-SP', display: 'Serbian' },
{ value: 'sk', display: 'Slovak' },
{ value: 'es-AR', display: 'Spanish (Argentina)' },
{ value: 'es-BO', display: 'Spanish (Bolivia)' },
{ value: 'es-CL', display: 'Spanish (Chile)' },
{ value: 'es-CO', display: 'Spanish (Colombia)' },
{ value: 'es-CR', display: 'Spanish (Costa Rica)' },
{ value: 'es-DO', display: 'Spanish (Dominican Republic)' },
{ value: 'es-EC', display: 'Spanish (Ecuador)' },
{ value: 'es-SV', display: 'Spanish (El Salvador)' },
{ value: 'es-GT', display: 'Spanish (Guatemala)' },
{ value: 'es-HN', display: 'Spanish (Honduras)' },
{ value: 'es-MX', display: 'Spanish (Mexico)' },
{ value: 'es-NI', display: 'Spanish (Nicaragua)' },
{ value: 'es-PA', display: 'Spanish (Panama)' },
{ value: 'es-PY', display: 'Spanish (Paraguay)' },
{ value: 'es-PE', display: 'Spanish (Peru)' },
{ value: 'es-PR', display: 'Spanish (Puerto Rico)' },
{ value: 'es-ES', display: 'Spanish (Spain)' },
{ value: 'es-US', display: 'Spanish (US)' },
{ value: 'es-UY', display: 'Spanish (Uruguay)' },
{ value: 'es-VE', display: 'Spanish (Venezuela)' },
{ value: 'sv-SE', display: 'Swedish' },
{ value: 'tr', display: 'Turkish' },
{ value: 'zu', display: 'Zulu' },
];
const handleSelect = (value: string) => {
setLanguageSTT(value);
};
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_language')}</div>
<Dropdown
value={languageSTT}
onChange={handleSelect}
options={languageOptions}
width={220}
position={'left'}
testId="LanguageSTTDropdown"
/>
</div>
);
}

View file

@ -9,7 +9,7 @@ export default function SpeechToTextSwitch({
onCheckedChange?: (value: boolean) => void;
}) {
const localize = useLocalize();
const [speechToText, setSpeechToText] = useRecoilState<boolean>(store.SpeechToText);
const [speechToText, setSpeechToText] = useRecoilState<boolean>(store.speechToText);
const handleCheckedChange = (value: boolean) => {
setSpeechToText(value);

View file

@ -3,3 +3,4 @@ export { default as SpeechToTextSwitch } from './SpeechToTextSwitch';
export { default as EngineSTTDropdown } from './EngineSTTDropdown';
export { default as DecibelSelector } from './DecibelSelector';
export { default as AutoTranscribeAudioSwitch } from './AutoTranscribeAudioSwitch';
export { default as LanguageSTTDropdown } from './LanguageSTTDropdown';

View file

@ -1,6 +1,6 @@
import * as Tabs from '@radix-ui/react-tabs';
import { SettingsTabValues } from 'librechat-data-provider';
import React, { useState, useRef } from 'react';
import React, { useState, useRef, useEffect, useCallback } from 'react';
import { useRecoilState } from 'recoil';
import { Lightbulb, Cog } from 'lucide-react';
import { useOnClickOutside, useMediaQuery } from '~/hooks';
@ -10,7 +10,7 @@ import ConversationModeSwitch from './ConversationModeSwitch';
import {
TextToSpeechSwitch,
EngineTTSDropdown,
AutomaticPlayback,
AutomaticPlaybackSwitch,
CacheTTSSwitch,
VoiceDropdown,
PlaybackRate,
@ -18,16 +18,100 @@ import {
import {
DecibelSelector,
EngineSTTDropdown,
LanguageSTTDropdown,
SpeechToTextSwitch,
AutoSendTextSwitch,
AutoTranscribeAudioSwitch,
} from './STT';
import { useCustomConfigSpeechQuery } from '~/data-provider';
function Speech() {
const isSmallScreen = useMediaQuery('(max-width: 767px)');
const [advancedMode, setAdvancedMode] = useRecoilState<boolean>(store.advancedMode);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const [confirmClear, setConfirmClear] = useState(false);
const { data } = useCustomConfigSpeechQuery();
const isSmallScreen = useMediaQuery('(max-width: 767px)');
const [advancedMode, setAdvancedMode] = useRecoilState(store.advancedMode);
const [autoTranscribeAudio, setAutoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio);
const [conversationMode, setConversationMode] = useRecoilState(store.conversationMode);
const [speechToText, setSpeechToText] = useRecoilState(store.speechToText);
const [textToSpeech, setTextToSpeech] = useRecoilState(store.textToSpeech);
const [cacheTTS, setCacheTTS] = useRecoilState(store.cacheTTS);
const [engineSTT, setEngineSTT] = useRecoilState<string>(store.engineSTT);
const [languageSTT, setLanguageSTT] = useRecoilState<string>(store.languageSTT);
const [decibelValue, setDecibelValue] = useRecoilState(store.decibelValue);
const [autoSendText, setAutoSendText] = useRecoilState(store.autoSendText);
const [engineTTS, setEngineTTS] = useRecoilState<string>(store.engineTTS);
const [voice, setVoice] = useRecoilState<string>(store.voice);
const [languageTTS, setLanguageTTS] = useRecoilState<string>(store.languageTTS);
const [automaticPlayback, setAutomaticPlayback] = useRecoilState(store.automaticPlayback);
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
const updateSetting = useCallback(
(key, newValue) => {
const settings = {
conversationMode: { value: conversationMode, setFunc: setConversationMode },
advancedMode: { value: advancedMode, setFunc: setAdvancedMode },
speechToText: { value: speechToText, setFunc: setSpeechToText },
textToSpeech: { value: textToSpeech, setFunc: setTextToSpeech },
cacheTTS: { value: cacheTTS, setFunc: setCacheTTS },
engineSTT: { value: engineSTT, setFunc: setEngineSTT },
languageSTT: { value: languageSTT, setFunc: setLanguageSTT },
autoTranscribeAudio: { value: autoTranscribeAudio, setFunc: setAutoTranscribeAudio },
decibelValue: { value: decibelValue, setFunc: setDecibelValue },
autoSendText: { value: autoSendText, setFunc: setAutoSendText },
engineTTS: { value: engineTTS, setFunc: setEngineTTS },
voice: { value: voice, setFunc: setVoice },
languageTTS: { value: languageTTS, setFunc: setLanguageTTS },
automaticPlayback: { value: automaticPlayback, setFunc: setAutomaticPlayback },
playbackRate: { value: playbackRate, setFunc: setPlaybackRate },
};
if (settings[key]) {
const setting = settings[key];
setting.setFunc(newValue);
}
},
[
conversationMode,
advancedMode,
speechToText,
textToSpeech,
cacheTTS,
engineSTT,
languageSTT,
autoTranscribeAudio,
decibelValue,
autoSendText,
engineTTS,
voice,
languageTTS,
automaticPlayback,
playbackRate,
setConversationMode,
setAdvancedMode,
setSpeechToText,
setTextToSpeech,
setCacheTTS,
setEngineSTT,
setLanguageSTT,
setAutoTranscribeAudio,
setDecibelValue,
setAutoSendText,
setEngineTTS,
setVoice,
setLanguageTTS,
setAutomaticPlayback,
setPlaybackRate,
],
);
useEffect(() => {
if (data) {
Object.entries(data).forEach(([key, value]) => {
updateSetting(key, value);
});
}
}, []);
const contentRef = useRef(null);
useOnClickOutside(contentRef, () => confirmClear && setConfirmClear(false), []);
@ -91,13 +175,13 @@ function Speech() {
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<EngineSTTDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<LanguageSTTDropdown />
</div>
<div className="h-px bg-black/20 bg-white/20" role="none" />
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<TextToSpeechSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<AutomaticPlayback />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<EngineTTSDropdown />
</div>
@ -119,6 +203,9 @@ function Speech() {
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<EngineSTTDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<LanguageSTTDropdown />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<AutoTranscribeAudioSwitch />
</div>
@ -135,7 +222,7 @@ function Speech() {
<TextToSpeechSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<AutomaticPlayback />
<AutomaticPlaybackSwitch />
</div>
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
<EngineTTSDropdown />

View file

@ -3,7 +3,7 @@ import { Switch } from '~/components/ui';
import { useLocalize } from '~/hooks';
import store from '~/store';
export default function AutomaticPlayback({
export default function AutomaticPlaybackSwitch({
onCheckedChange,
}: {
onCheckedChange?: (value: boolean) => void;

View file

@ -10,7 +10,7 @@ export default function CacheTTSSwitch({
}) {
const localize = useLocalize();
const [cacheTTS, setCacheTTS] = useRecoilState<boolean>(store.cacheTTS);
const [textToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
const [textToSpeech] = useRecoilState<boolean>(store.textToSpeech);
const handleCheckedChange = (value: boolean) => {
setCacheTTS(value);

View file

@ -5,21 +5,21 @@ import store from '~/store';
export default function EngineTTSDropdown() {
const localize = useLocalize();
const [endpointTTS, setEndpointTTS] = useRecoilState<string>(store.endpointTTS);
const [engineTTS, setEngineTTS] = useRecoilState<string>(store.engineTTS);
const endpointOptions = [
{ value: 'browser', display: localize('com_nav_browser') },
{ value: 'external', display: localize('com_nav_external') },
];
const handleSelect = (value: string) => {
setEndpointTTS(value);
setEngineTTS(value);
};
return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_engine')}</div>
<Dropdown
value={endpointTTS}
value={engineTTS}
onChange={handleSelect}
options={endpointOptions}
width={180}

View file

@ -7,7 +7,7 @@ import { cn, defaultTextProps, optionText } from '~/utils/';
export default function DecibelSelector() {
const localize = useLocalize();
const textToSpeech = useRecoilValue(store.TextToSpeech);
const textToSpeech = useRecoilValue(store.textToSpeech);
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
return (

View file

@ -9,7 +9,7 @@ export default function TextToSpeechSwitch({
onCheckedChange?: (value: boolean) => void;
}) {
const localize = useLocalize();
const [TextToSpeech, setTextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
const [TextToSpeech, setTextToSpeech] = useRecoilState<boolean>(store.textToSpeech);
const handleCheckedChange = (value: boolean) => {
setTextToSpeech(value);

View file

@ -0,0 +1,38 @@
import React from 'react';
import '@testing-library/jest-dom/extend-expect';
import { render, fireEvent } from 'test/layout-test-utils';
import AutomaticPlaybackSwitch from '../AutomaticPlaybackSwitch';
import { RecoilRoot } from 'recoil';
describe('AutomaticPlaybackSwitch', () => {
/**
* Mock function to set the text-to-speech state.
*/
let mockSetAutomaticPlayback: jest.Mock<void, [boolean]> | ((value: boolean) => void) | undefined;
beforeEach(() => {
mockSetAutomaticPlayback = jest.fn();
});
it('renders correctly', () => {
const { getByTestId } = render(
<RecoilRoot>
<AutomaticPlaybackSwitch />
</RecoilRoot>,
);
expect(getByTestId('AutomaticPlayback')).toBeInTheDocument();
});
it('calls onCheckedChange when the switch is toggled', () => {
const { getByTestId } = render(
<RecoilRoot>
<AutomaticPlaybackSwitch onCheckedChange={mockSetAutomaticPlayback} />
</RecoilRoot>,
);
const switchElement = getByTestId('AutomaticPlayback');
fireEvent.click(switchElement);
expect(mockSetAutomaticPlayback).toHaveBeenCalledWith(true);
});
});

View file

@ -1,4 +1,4 @@
export { default as AutomaticPlayback } from './AutomaticPlayback';
export { default as AutomaticPlaybackSwitch } from './AutomaticPlaybackSwitch';
export { default as CacheTTSSwitch } from './CacheTTSSwitch';
export { default as EngineTTSDropdown } from './EngineTTSDropdown';
export { default as PlaybackRate } from './PlaybackRate';

View file

@ -423,6 +423,13 @@ export const useVoicesQuery = (): UseQueryResult<t.VoiceResponse> => {
return useQuery([QueryKeys.voices], () => dataService.getVoices());
};
/* Custom config speech */
export const useCustomConfigSpeechQuery = (): UseQueryResult<t.getCustomConfigSpeechResponse> => {
return useQuery([QueryKeys.customConfigSpeech], () => dataService.getCustomConfigSpeech());
};
/** Prompt */
export const usePromptGroupsInfiniteQuery = (
params?: t.TPromptGroupsWithFilterRequest,
config?: UseInfiniteQueryOptions<t.PromptGroupListResponse, unknown>,

View file

@ -8,3 +8,4 @@ export { default as useRequiresKey } from './useRequiresKey';
export { default as useMultipleKeys } from './useMultipleKeys';
export { default as useSpeechToText } from './useSpeechToText';
export { default as useTextToSpeech } from './useTextToSpeech';
export { default as useGetAudioSettings } from './useGetAudioSettings';

View file

@ -0,0 +1,19 @@
import { useRecoilState } from 'recoil';
import store from '~/store';
export enum AudioEndpoints {
browser = 'browser',
external = 'external',
}
const useGetAudioSettings = () => {
const [engineSTT] = useRecoilState<string>(store.engineSTT);
const [engineTTS] = useRecoilState<string>(store.engineTTS);
const externalSpeechToText = engineSTT === AudioEndpoints.external;
const externalTextToSpeech = engineTTS === AudioEndpoints.external;
return { externalSpeechToText, externalTextToSpeech };
};
export default useGetAudioSettings;

View file

@ -1,17 +1,16 @@
import { useState, useEffect } from 'react';
import useSpeechToTextBrowser from './useSpeechToTextBrowser';
import useSpeechToTextExternal from './useSpeechToTextExternal';
import { useRecoilState } from 'recoil';
import store from '~/store';
import useGetAudioSettings from './useGetAudioSettings';
const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
const [endpointSTT] = useRecoilState<string>(store.endpointSTT);
const useExternalSpeechToText = endpointSTT === 'external';
const { externalSpeechToText } = useGetAudioSettings();
const [animatedText, setAnimatedText] = useState('');
const {
isListening: speechIsListeningBrowser,
isLoading: speechIsLoadingBrowser,
interimTranscript: interimTranscriptBrowser,
text: speechTextBrowser,
startRecording: startSpeechRecordingBrowser,
stopRecording: stopSpeechRecordingBrowser,
@ -26,21 +25,21 @@ const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) =>
clearText,
} = useSpeechToTextExternal(handleTranscriptionComplete);
const isListening = useExternalSpeechToText
? speechIsListeningExternal
: speechIsListeningBrowser;
const isLoading = useExternalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
const speechTextForm = useExternalSpeechToText ? speechTextExternal : speechTextBrowser;
const startRecording = useExternalSpeechToText
const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser;
const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser;
const startRecording = externalSpeechToText
? startSpeechRecordingExternal
: startSpeechRecordingBrowser;
const stopRecording = useExternalSpeechToText
const stopRecording = externalSpeechToText
? stopSpeechRecordingExternal
: stopSpeechRecordingBrowser;
const speechText =
isListening || (speechTextExternal && speechTextExternal.length > 0)
? speechTextExternal
: speechTextForm || '';
// for a future real-time STT external
const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser;
const animateTextTyping = (text: string) => {
const totalDuration = 2000;
@ -65,17 +64,18 @@ const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) =>
};
useEffect(() => {
if (speechText) {
if (speechText && externalSpeechToText) {
animateTextTyping(speechText);
}
}, [speechText]);
}, [speechText, externalSpeechToText]);
return {
isListening,
isLoading,
startRecording,
stopRecording,
speechText: animatedText,
interimTranscript,
speechText: externalSpeechToText ? animatedText : speechText,
clearText,
};
};

View file

@ -1,34 +1,57 @@
import { useEffect } from 'react';
import { useEffect, useState } from 'react';
import { useRecoilState } from 'recoil';
import { useToastContext } from '~/Providers';
import store from '~/store';
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
import useGetAudioSettings from './useGetAudioSettings';
const useSpeechToTextBrowser = () => {
const { showToast } = useToastContext();
const [endpointSTT] = useRecoilState<string>(store.endpointSTT);
const [languageSTT] = useRecoilState<string>(store.languageSTT);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const { externalSpeechToText } = useGetAudioSettings();
const [isListening, setIsListening] = useState(false);
const { transcript, listening, resetTranscript, browserSupportsSpeechRecognition } =
useSpeechRecognition();
const {
interimTranscript,
finalTranscript,
listening,
browserSupportsSpeechRecognition,
isMicrophoneAvailable,
} = useSpeechRecognition();
const toggleListening = () => {
if (browserSupportsSpeechRecognition) {
if (listening) {
SpeechRecognition.stopListening();
} else {
SpeechRecognition.startListening();
}
} else {
if (!browserSupportsSpeechRecognition) {
showToast({
message: 'Browser does not support SpeechRecognition',
status: 'error',
});
return;
}
if (!isMicrophoneAvailable) {
showToast({
message: 'Microphone is not available',
status: 'error',
});
return;
}
if (listening) {
setIsListening(false);
SpeechRecognition.stopListening();
} else {
setIsListening(true);
SpeechRecognition.startListening({
language: languageSTT,
continuous: autoTranscribeAudio,
});
}
};
useEffect(() => {
const handleKeyDown = (e: KeyboardEvent) => {
if (e.shiftKey && e.altKey && e.code === 'KeyL' && endpointSTT === 'browser') {
if (e.shiftKey && e.altKey && e.code === 'KeyL' && !externalSpeechToText) {
toggleListening();
}
};
@ -37,15 +60,19 @@ const useSpeechToTextBrowser = () => {
return () => window.removeEventListener('keydown', handleKeyDown);
}, []);
useEffect(() => {
if (!listening) {
setIsListening(false);
}
}, [listening]);
return {
isListening: listening,
isListening,
isLoading: false,
text: transcript,
interimTranscript,
text: finalTranscript,
startRecording: toggleListening,
stopRecording: () => {
SpeechRecognition.stopListening();
resetTranscript();
},
stopRecording: toggleListening,
};
};

View file

@ -3,11 +3,12 @@ import { useRecoilState } from 'recoil';
import { useSpeechToTextMutation } from '~/data-provider';
import { useToastContext } from '~/Providers';
import store from '~/store';
import useGetAudioSettings from './useGetAudioSettings';
const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => {
const { showToast } = useToastContext();
const [endpointSTT] = useRecoilState<string>(store.endpointSTT);
const [speechToText] = useRecoilState<boolean>(store.SpeechToText);
const { externalSpeechToText } = useGetAudioSettings();
const [speechToText] = useRecoilState<boolean>(store.speechToText);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
const [autoSendText] = useRecoilState<boolean>(store.autoSendText);
const [text, setText] = useState<string>('');
@ -196,7 +197,7 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
};
const handleKeyDown = async (e: KeyboardEvent) => {
if (e.shiftKey && e.altKey && e.code === 'KeyL' && endpointSTT !== 'browser') {
if (e.shiftKey && e.altKey && e.code === 'KeyL' && !externalSpeechToText) {
if (!window.MediaRecorder) {
showToast({ message: 'MediaRecorder is not supported in this browser', status: 'error' });
return;

View file

@ -4,12 +4,10 @@ import type { TMessage } from 'librechat-data-provider';
import useTextToSpeechExternal from './useTextToSpeechExternal';
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
import { usePauseGlobalAudio } from '../Audio';
import { useRecoilState } from 'recoil';
import store from '~/store';
import useGetAudioSettings from './useGetAudioSettings';
const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
const useExternalTextToSpeech = endpointTTS === 'external';
const { externalTextToSpeech } = useGetAudioSettings();
const {
generateSpeechLocal: generateSpeechLocal,
@ -26,9 +24,9 @@ const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
} = useTextToSpeechExternal(message.messageId, isLast, index);
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
const generateSpeech = useExternalTextToSpeech ? generateSpeechExternal : generateSpeechLocal;
const cancelSpeech = useExternalTextToSpeech ? cancelSpeechExternal : cancelSpeechLocal;
const isSpeaking = useExternalTextToSpeech ? isSpeakingExternal : isSpeakingLocal;
const generateSpeech = externalTextToSpeech ? generateSpeechExternal : generateSpeechLocal;
const cancelSpeech = externalTextToSpeech ? cancelSpeechExternal : cancelSpeechLocal;
const isSpeaking = externalTextToSpeech ? isSpeakingExternal : isSpeakingLocal;
const isMouseDownRef = useRef(false);
const timerRef = useRef<number | undefined>(undefined);

View file

@ -539,7 +539,7 @@ export default {
com_nav_modular_chat: 'Enable switching Endpoints mid-conversation',
com_nav_latex_parsing: 'Parsing LaTeX in messages (may affect performance)',
com_nav_text_to_speech: 'Text to Speech',
com_nav_automatic_playback: 'Autoplay Latest Message (external only)',
com_nav_automatic_playback: 'Autoplay Latest Message',
com_nav_speech_to_text: 'Speech to Text',
com_nav_profile_picture: 'Profile Picture',
com_nav_change_picture: 'Change picture',

View file

@ -18,32 +18,45 @@ const staticAtoms = {
showPopover: atom<boolean>({ key: 'showPopover', default: false }),
};
// Atoms with localStorage
const localStorageAtoms = {
// General settings
autoScroll: atomWithLocalStorage('autoScroll', false),
showCode: atomWithLocalStorage('showCode', false),
hideSidePanel: atomWithLocalStorage('hideSidePanel', false),
modularChat: atomWithLocalStorage('modularChat', true),
LaTeXParsing: atomWithLocalStorage('LaTeXParsing', true),
UsernameDisplay: atomWithLocalStorage('UsernameDisplay', true),
TextToSpeech: atomWithLocalStorage('textToSpeech', true),
automaticPlayback: atomWithLocalStorage('automaticPlayback', false),
// Messages settings
enterToSend: atomWithLocalStorage('enterToSend', true),
SpeechToText: atomWithLocalStorage('speechToText', true),
conversationMode: atomWithLocalStorage('conversationMode', false),
advancedMode: atomWithLocalStorage('advancedMode', false),
autoSendText: atomWithLocalStorage('autoSendText', false),
autoTranscribeAudio: atomWithLocalStorage('autoTranscribeAudio', false),
decibelValue: atomWithLocalStorage('decibelValue', -45),
endpointSTT: atomWithLocalStorage('endpointSTT', 'browser'),
endpointTTS: atomWithLocalStorage('endpointTTS', 'browser'),
cacheTTS: atomWithLocalStorage('cacheTTS', true),
voice: atomWithLocalStorage('voice', ''),
showCode: atomWithLocalStorage('showCode', false),
saveDrafts: atomWithLocalStorage('saveDrafts', false),
forkSetting: atomWithLocalStorage('forkSetting', ''),
splitAtTarget: atomWithLocalStorage('splitAtTarget', false),
rememberForkOption: atomWithLocalStorage('rememberForkOption', true),
// Beta features settings
modularChat: atomWithLocalStorage('modularChat', true),
LaTeXParsing: atomWithLocalStorage('LaTeXParsing', true),
// Speech settings
conversationMode: atomWithLocalStorage('conversationMode', false),
advancedMode: atomWithLocalStorage('advancedMode', false),
speechToText: atomWithLocalStorage('speechToText', true),
engineSTT: atomWithLocalStorage('engineSTT', 'browser'),
languageSTT: atomWithLocalStorage('languageSTT', ''),
autoTranscribeAudio: atomWithLocalStorage('autoTranscribeAudio', false),
decibelValue: atomWithLocalStorage('decibelValue', -45),
autoSendText: atomWithLocalStorage('autoSendText', false),
textToSpeech: atomWithLocalStorage('textToSpeech', true),
engineTTS: atomWithLocalStorage('engineTTS', 'browser'),
voice: atomWithLocalStorage('voice', ''),
languageTTS: atomWithLocalStorage('languageTTS', ''),
automaticPlayback: atomWithLocalStorage('automaticPlayback', false),
playbackRate: atomWithLocalStorage<number | null>('playbackRate', null),
saveDrafts: atomWithLocalStorage('saveDrafts', false),
cacheTTS: atomWithLocalStorage('cacheTTS', true),
// Account settings
UsernameDisplay: atomWithLocalStorage('UsernameDisplay', true),
};
export default { ...staticAtoms, ...localStorageAtoms };

View file

@ -128,14 +128,18 @@ export const images = () => `${files()}/images`;
export const avatar = () => `${images()}/avatar`;
export const speechToText = () => `${files()}/stt`;
export const speech = () => `${files()}/speech`;
export const textToSpeech = () => `${files()}/tts`;
export const speechToText = () => `${speech()}/stt`;
export const textToSpeech = () => `${speech()}/tts`;
export const textToSpeechManual = () => `${textToSpeech()}/manual`;
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
export const getCustomConfigSpeech = () => `${speech()}/config/get`;
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;
export const getPromptGroupsWithFilters = (filter: object) => {

View file

@ -6,6 +6,7 @@ import { fileConfigSchema } from './file-config';
import { specsConfigSchema } from './models';
import { FileSources } from './types/files';
import { TModelsConfig } from './types';
import { speech } from './api-endpoints';
export const defaultSocialLogins = ['google', 'facebook', 'openid', 'github', 'discord'];
@ -273,6 +274,40 @@ const sttSchema = z.object({
.optional(),
});
const speechTab = z
.object({
conversationMode: z.boolean().optional(),
advancedMode: z.boolean().optional(),
speechToText: z
.boolean()
.optional()
.or(
z.object({
engineSTT: z.string().optional(),
languageSTT: z.string().optional(),
autoTranscribeAudio: z.boolean().optional(),
decibelValue: z.number().optional(),
autoSendText: z.boolean().optional(),
}),
)
.optional(),
textToSpeech: z
.boolean()
.optional()
.or(
z.object({
engineTTS: z.string().optional(),
voice: z.string().optional(),
languageTTS: z.string().optional(),
automaticPlayback: z.boolean().optional(),
playbackRate: z.number().optional(),
cacheTTS: z.boolean().optional(),
}),
)
.optional(),
})
.optional();
export enum RateLimitPrefix {
FILE_UPLOAD = 'FILE_UPLOAD',
IMPORT = 'IMPORT',
@ -362,8 +397,13 @@ export const configSchema = z.object({
allowedDomains: z.array(z.string()).optional(),
})
.default({ socialLogins: defaultSocialLogins }),
speech: z
.object({
tts: ttsSchema.optional(),
stt: sttSchema.optional(),
speechTab: speechTab.optional(),
})
.optional(),
rateLimits: rateLimitSchema.optional(),
fileConfig: fileConfigSchema.optional(),
modelSpecs: specsConfigSchema.optional(),

View file

@ -355,6 +355,10 @@ export const getVoices = (): Promise<f.VoiceResponse> => {
return request.get(endpoints.textToSpeechVoices());
};
export const getCustomConfigSpeech = (): Promise<f.getCustomConfigSpeechResponse[]> => {
return request.get(endpoints.getCustomConfigSpeech());
};
/* actions */
export const updateAction = (data: m.UpdateActionVariables): Promise<m.UpdateActionResponse> => {

View file

@ -27,6 +27,7 @@ export enum QueryKeys {
assistantDocs = 'assistantDocs',
fileDownload = 'fileDownload',
voices = 'voices',
customConfigSpeech = 'customConfigSpeech',
prompts = 'prompts',
prompt = 'prompt',
promptGroups = 'promptGroups',

View file

@ -83,6 +83,8 @@ export type SpeechToTextResponse = {
export type VoiceResponse = string[];
export type getCustomConfigSpeechResponse = { [key: string]: string };
export type UploadMutationOptions = {
onSuccess?: (data: TFileUpload, variables: FormData, context?: unknown) => void;
onMutate?: (variables: FormData) => void | Promise<unknown>;
@ -113,6 +115,12 @@ export type VoiceOptions = {
onError?: (error: unknown, variables: unknown, context?: unknown) => void;
};
export type getCustomConfigSpeechOptions = {
onSuccess?: (data: getCustomConfigSpeechResponse, variables: unknown, context?: unknown) => void;
onMutate?: () => void | Promise<unknown>;
onError?: (error: unknown, variables: unknown, context?: unknown) => void;
};
export type DeleteFilesResponse = {
message: string;
result: Record<string, unknown>;