mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
🎤 feat: add custom speech config, browser TTS/STT features, and dynamic speech tab settings (#2921)
* feat: update useTextToSpeech and useSpeechToText hooks to support external audio endpoints This commit updates the useTextToSpeech and useSpeechToText hooks in the Input directory to support external audio endpoints. It introduces the useGetExternalTextToSpeech and useGetExternalSpeechToText hooks, which determine whether the audio endpoints should be set to 'browser' or 'external' based on the value of the endpointTTS and endpointSTT Recoil states. The useTextToSpeech and useSpeechToText hooks now use these new hooks to determine whether to use external audio endpoints * feat: add userSelect style to ConversationModeSwitch label * fix: remove unused updateTokenWebsocket function and import The updateTokenWebsocket function and its import are no longer used in the OpenAIClient module. This commit removes the function and import to clean up the codebase * feat: support external audio endpoints in useTextToSpeech and useSpeechToText hooks This commit updates the useTextToSpeech and useSpeechToText hooks in the Input directory to support external audio endpoints. It introduces the useGetExternalTextToSpeech and useGetExternalSpeechToText hooks, which determine whether the audio endpoints should be set to 'browser' or 'external' based on the value of the endpointTTS and endpointSTT Recoil states. The useTextToSpeech and useSpeechToText hooks now use these new hooks to determine whether to use external audio endpoints * feat: update AutomaticPlayback component to AutomaticPlaybackSwitch; tests: added AutomaticPlaybackSwitch.spec > > This commit renames the AutomaticPlayback component to AutomaticPlaybackSwitch in the Speech directory. The new name better reflects the purpose of the component and aligns with the naming convention used in the codebase. * feat: update useSpeechToText hook to include interimTranscript This commit updates the useSpeechToText hook in the client/src/components/Chat/Input/AudioRecorder.tsx file to include the interimTranscript state. This allows for real-time display of the speech-to-text transcription while the user is still speaking. The interimTranscript is now used to update the text area value during recording. * feat: Add customConfigSpeech API endpoint for retrieving custom speech configuration This commit adds a new API endpoint in the file under the directory. This endpoint is responsible for retrieving the custom speech configuration using the function from the module * feat: update store var and ; fix: getCustomConfigSpeech * fix: client tests, removed unused import * feat: Update useCustomConfigSpeechQuery to return an array of custom speech configurations This commit modifies the useCustomConfigSpeechQuery function in the client/src/data-provider/queries.ts file to return an array of custom speech configurations instead of a single object. This change allows for better handling and manipulation of the data in the application * feat: Update useCustomConfigSpeechQuery to return an array of custom speech configurations * refactor: Update variable name in speechTab schema * refactor: removed unused and nested code * fix: using recoilState * refactor: Update Speech component to use useCallback for setting settings * fix: test * fix: tests * feature: ensure that the settings don't change after modifying then through the UI * remove comment * fix: Handle error gracefully in getCustomConfigSpeech and getVoices endpoints * fix: Handle error * fix: backend tests * fix: invalid custom config logging * chore: add back custom config info logging * chore: revert loadCustomConfig spec --------- Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
parent
5d985746cb
commit
1aad315de6
50 changed files with 598 additions and 179 deletions
|
|
@ -27,7 +27,6 @@ const {
|
||||||
createContextHandlers,
|
createContextHandlers,
|
||||||
} = require('./prompts');
|
} = require('./prompts');
|
||||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||||
const { updateTokenWebsocket } = require('~/server/services/Files/Audio');
|
|
||||||
const { isEnabled, sleep } = require('~/server/utils');
|
const { isEnabled, sleep } = require('~/server/utils');
|
||||||
const { handleOpenAIErrors } = require('./tools/util');
|
const { handleOpenAIErrors } = require('./tools/util');
|
||||||
const spendTokens = require('~/models/spendTokens');
|
const spendTokens = require('~/models/spendTokens');
|
||||||
|
|
@ -595,7 +594,6 @@ class OpenAIClient extends BaseClient {
|
||||||
payload,
|
payload,
|
||||||
(progressMessage) => {
|
(progressMessage) => {
|
||||||
if (progressMessage === '[DONE]') {
|
if (progressMessage === '[DONE]') {
|
||||||
updateTokenWebsocket('[DONE]');
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,11 @@
|
||||||
const express = require('express');
|
const express = require('express');
|
||||||
const {
|
const { uaParser, checkBan, requireJwtAuth, createFileLimiters } = require('~/server/middleware');
|
||||||
uaParser,
|
|
||||||
checkBan,
|
|
||||||
requireJwtAuth,
|
|
||||||
createFileLimiters,
|
|
||||||
createTTSLimiters,
|
|
||||||
createSTTLimiters,
|
|
||||||
} = require('~/server/middleware');
|
|
||||||
const { createMulterInstance } = require('./multer');
|
const { createMulterInstance } = require('./multer');
|
||||||
|
|
||||||
const files = require('./files');
|
const files = require('./files');
|
||||||
const images = require('./images');
|
const images = require('./images');
|
||||||
const avatar = require('./avatar');
|
const avatar = require('./avatar');
|
||||||
const stt = require('./stt');
|
const speech = require('./speech');
|
||||||
const tts = require('./tts');
|
|
||||||
|
|
||||||
const initialize = async () => {
|
const initialize = async () => {
|
||||||
const router = express.Router();
|
const router = express.Router();
|
||||||
|
|
@ -21,11 +13,8 @@ const initialize = async () => {
|
||||||
router.use(checkBan);
|
router.use(checkBan);
|
||||||
router.use(uaParser);
|
router.use(uaParser);
|
||||||
|
|
||||||
/* Important: stt/tts routes must be added before the upload limiters */
|
/* Important: speech route must be added before the upload limiters */
|
||||||
const { sttIpLimiter, sttUserLimiter } = createSTTLimiters();
|
router.use('/speech', speech);
|
||||||
const { ttsIpLimiter, ttsUserLimiter } = createTTSLimiters();
|
|
||||||
router.use('/stt', sttIpLimiter, sttUserLimiter, stt);
|
|
||||||
router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
|
|
||||||
|
|
||||||
const upload = await createMulterInstance();
|
const upload = await createMulterInstance();
|
||||||
const { fileUploadIpLimiter, fileUploadUserLimiter } = createFileLimiters();
|
const { fileUploadIpLimiter, fileUploadUserLimiter } = createFileLimiters();
|
||||||
|
|
|
||||||
10
api/server/routes/files/speech/customConfigSpeech.js
Normal file
10
api/server/routes/files/speech/customConfigSpeech.js
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
const express = require('express');
|
||||||
|
const router = express.Router();
|
||||||
|
|
||||||
|
const { getCustomConfigSpeech } = require('~/server/services/Files/Audio');
|
||||||
|
|
||||||
|
router.get('/get', async (req, res) => {
|
||||||
|
await getCustomConfigSpeech(req, res);
|
||||||
|
});
|
||||||
|
|
||||||
|
module.exports = router;
|
||||||
17
api/server/routes/files/speech/index.js
Normal file
17
api/server/routes/files/speech/index.js
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
const express = require('express');
|
||||||
|
const { createTTSLimiters, createSTTLimiters } = require('~/server/middleware');
|
||||||
|
|
||||||
|
const stt = require('./stt');
|
||||||
|
const tts = require('./tts');
|
||||||
|
const customConfigSpeech = require('./customConfigSpeech');
|
||||||
|
|
||||||
|
const router = express.Router();
|
||||||
|
|
||||||
|
const { sttIpLimiter, sttUserLimiter } = createSTTLimiters();
|
||||||
|
const { ttsIpLimiter, ttsUserLimiter } = createTTSLimiters();
|
||||||
|
router.use('/stt', sttIpLimiter, sttUserLimiter, stt);
|
||||||
|
router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
|
||||||
|
|
||||||
|
router.use('/config', customConfigSpeech);
|
||||||
|
|
||||||
|
module.exports = router;
|
||||||
|
|
@ -76,8 +76,28 @@ Please specify a correct \`imageOutputType\` value (case-sensitive).
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if (!result.success) {
|
if (!result.success) {
|
||||||
i === 0 && logger.error(`Invalid custom config file at ${configPath}`, result.error);
|
let errorMessage = `Invalid custom config file at ${configPath}:
|
||||||
i === 0 && i++;
|
${JSON.stringify(result.error, null, 2)}`;
|
||||||
|
|
||||||
|
if (i === 0) {
|
||||||
|
logger.error(errorMessage);
|
||||||
|
const speechError = result.error.errors.find(
|
||||||
|
(err) =>
|
||||||
|
err.code === 'unrecognized_keys' &&
|
||||||
|
(err.message?.includes('stt') || err.message?.includes('tts')),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (speechError) {
|
||||||
|
logger.warn(`
|
||||||
|
The Speech-to-text and Text-to-speech configuration format has recently changed.
|
||||||
|
If you're getting this error, please refer to the latest documentation:
|
||||||
|
|
||||||
|
https://www.librechat.ai/docs/configuration/stt_tts`);
|
||||||
|
}
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
logger.info('Custom config file loaded:');
|
logger.info('Custom config file loaded:');
|
||||||
|
|
|
||||||
50
api/server/services/Files/Audio/getCustomConfigSpeech.js
Normal file
50
api/server/services/Files/Audio/getCustomConfigSpeech.js
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function retrieves the speechTab settings from the custom configuration
|
||||||
|
* It first fetches the custom configuration
|
||||||
|
* Then, it checks if the custom configuration and the speechTab schema exist
|
||||||
|
* If they do, it sends the speechTab settings as a JSON response
|
||||||
|
* If they don't, it throws an error
|
||||||
|
*
|
||||||
|
* @param {Object} req - The request object
|
||||||
|
* @param {Object} res - The response object
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
* @throws {Error} - If the custom configuration or the speechTab schema is missing, an error is thrown
|
||||||
|
*/
|
||||||
|
async function getCustomConfigSpeech(req, res) {
|
||||||
|
try {
|
||||||
|
const customConfig = await getCustomConfig();
|
||||||
|
|
||||||
|
if (!customConfig || !customConfig.speech?.speechTab) {
|
||||||
|
throw new Error('Configuration or speechTab schema is missing');
|
||||||
|
}
|
||||||
|
|
||||||
|
const ttsSchema = customConfig.speech?.speechTab;
|
||||||
|
let settings = {};
|
||||||
|
|
||||||
|
if (ttsSchema.advancedMode !== undefined) {
|
||||||
|
settings.advancedMode = ttsSchema.advancedMode;
|
||||||
|
}
|
||||||
|
if (ttsSchema.speechToText) {
|
||||||
|
for (const key in ttsSchema.speechToText) {
|
||||||
|
if (ttsSchema.speechToText[key] !== undefined) {
|
||||||
|
settings[key] = ttsSchema.speechToText[key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ttsSchema.textToSpeech) {
|
||||||
|
for (const key in ttsSchema.textToSpeech) {
|
||||||
|
if (ttsSchema.textToSpeech[key] !== undefined) {
|
||||||
|
settings[key] = ttsSchema.textToSpeech[key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json(settings);
|
||||||
|
} catch (error) {
|
||||||
|
res.status(200).send();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = getCustomConfigSpeech;
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
const { logger } = require('~/config');
|
|
||||||
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
|
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
|
||||||
const { getProvider } = require('./textToSpeech');
|
const { getProvider } = require('./textToSpeech');
|
||||||
|
|
||||||
|
|
@ -16,11 +15,11 @@ async function getVoices(req, res) {
|
||||||
try {
|
try {
|
||||||
const customConfig = await getCustomConfig();
|
const customConfig = await getCustomConfig();
|
||||||
|
|
||||||
if (!customConfig || !customConfig?.tts) {
|
if (!customConfig || !customConfig?.speech?.tts) {
|
||||||
throw new Error('Configuration or TTS schema is missing');
|
throw new Error('Configuration or TTS schema is missing');
|
||||||
}
|
}
|
||||||
|
|
||||||
const ttsSchema = customConfig?.tts;
|
const ttsSchema = customConfig?.speech?.tts;
|
||||||
const provider = getProvider(ttsSchema);
|
const provider = getProvider(ttsSchema);
|
||||||
let voices;
|
let voices;
|
||||||
|
|
||||||
|
|
@ -40,8 +39,7 @@ async function getVoices(req, res) {
|
||||||
|
|
||||||
res.json(voices);
|
res.json(voices);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Failed to get voices: ${error.message}`);
|
res.status(500).json({ error: `Failed to get voices: ${error.message}` });
|
||||||
res.status(500).json({ error: 'Failed to get voices' });
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
const getVoices = require('./getVoices');
|
const getVoices = require('./getVoices');
|
||||||
|
const getCustomConfigSpeech = require('./getCustomConfigSpeech');
|
||||||
const textToSpeech = require('./textToSpeech');
|
const textToSpeech = require('./textToSpeech');
|
||||||
const speechToText = require('./speechToText');
|
const speechToText = require('./speechToText');
|
||||||
const { updateTokenWebsocket } = require('./webSocket');
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
getVoices,
|
getVoices,
|
||||||
|
getCustomConfigSpeech,
|
||||||
speechToText,
|
speechToText,
|
||||||
...textToSpeech,
|
...textToSpeech,
|
||||||
updateTokenWebsocket,
|
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ async function handleResponse(response) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function getProvider(sttSchema) {
|
function getProvider(sttSchema) {
|
||||||
if (sttSchema.openai) {
|
if (sttSchema?.openai) {
|
||||||
return 'openai';
|
return 'openai';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -176,7 +176,7 @@ async function speechToText(req, res) {
|
||||||
const audioReadStream = Readable.from(audioBuffer);
|
const audioReadStream = Readable.from(audioBuffer);
|
||||||
audioReadStream.path = 'audio.wav';
|
audioReadStream.path = 'audio.wav';
|
||||||
|
|
||||||
const provider = getProvider(customConfig.stt);
|
const provider = getProvider(customConfig.speech.stt);
|
||||||
|
|
||||||
let [url, data, headers] = [];
|
let [url, data, headers] = [];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -191,8 +191,8 @@ function localAIProvider(ttsSchema, input, voice) {
|
||||||
* @returns {Promise<[string, TProviderSchema]>}
|
* @returns {Promise<[string, TProviderSchema]>}
|
||||||
*/
|
*/
|
||||||
async function getProviderSchema(customConfig) {
|
async function getProviderSchema(customConfig) {
|
||||||
const provider = getProvider(customConfig.tts);
|
const provider = getProvider(customConfig.speech.tts);
|
||||||
return [provider, customConfig.tts[provider]];
|
return [provider, customConfig.speech.tts[provider]];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,31 +0,0 @@
|
||||||
let token = '';
|
|
||||||
|
|
||||||
function updateTokenWebsocket(newToken) {
|
|
||||||
console.log('Token:', newToken);
|
|
||||||
token = newToken;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sendTextToWebsocket(ws, onDataReceived) {
|
|
||||||
if (token === '[DONE]') {
|
|
||||||
ws.send(' ');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ws.readyState === WebSocket.OPEN) {
|
|
||||||
ws.send(token);
|
|
||||||
|
|
||||||
ws.onmessage = function (event) {
|
|
||||||
console.log('Received:', event.data);
|
|
||||||
if (onDataReceived) {
|
|
||||||
onDataReceived(event.data); // Pass the received data to the callback function
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
console.error('WebSocket is not open. Ready state is: ' + ws.readyState);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
updateTokenWebsocket,
|
|
||||||
sendTextToWebsocket,
|
|
||||||
};
|
|
||||||
|
|
@ -31,15 +31,26 @@ export default function AudioRecorder({
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const { isListening, isLoading, startRecording, stopRecording, speechText, clearText } =
|
const {
|
||||||
useSpeechToText(handleTranscriptionComplete);
|
isListening,
|
||||||
|
isLoading,
|
||||||
|
startRecording,
|
||||||
|
stopRecording,
|
||||||
|
interimTranscript,
|
||||||
|
speechText,
|
||||||
|
clearText,
|
||||||
|
} = useSpeechToText(handleTranscriptionComplete);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (textAreaRef.current) {
|
if (isListening && textAreaRef.current) {
|
||||||
|
methods.setValue('text', interimTranscript, {
|
||||||
|
shouldValidate: true,
|
||||||
|
});
|
||||||
|
} else if (textAreaRef.current) {
|
||||||
textAreaRef.current.value = speechText;
|
textAreaRef.current.value = speechText;
|
||||||
methods.setValue('text', speechText, { shouldValidate: true });
|
methods.setValue('text', speechText, { shouldValidate: true });
|
||||||
}
|
}
|
||||||
}, [speechText, methods, textAreaRef]);
|
}, [interimTranscript, speechText, methods, textAreaRef]);
|
||||||
|
|
||||||
const handleStartRecording = async () => {
|
const handleStartRecording = async () => {
|
||||||
await startRecording();
|
await startRecording();
|
||||||
|
|
|
||||||
|
|
@ -38,8 +38,8 @@ const ChatForm = ({ index = 0 }) => {
|
||||||
const submitButtonRef = useRef<HTMLButtonElement>(null);
|
const submitButtonRef = useRef<HTMLButtonElement>(null);
|
||||||
const textAreaRef = useRef<HTMLTextAreaElement | null>(null);
|
const textAreaRef = useRef<HTMLTextAreaElement | null>(null);
|
||||||
|
|
||||||
const SpeechToText = useRecoilValue(store.SpeechToText);
|
const SpeechToText = useRecoilValue(store.speechToText);
|
||||||
const TextToSpeech = useRecoilValue(store.TextToSpeech);
|
const TextToSpeech = useRecoilValue(store.textToSpeech);
|
||||||
const automaticPlayback = useRecoilValue(store.automaticPlayback);
|
const automaticPlayback = useRecoilValue(store.automaticPlayback);
|
||||||
|
|
||||||
const [showStopButton, setShowStopButton] = useRecoilState(store.showStopButtonByIndex(index));
|
const [showStopButton, setShowStopButton] = useRecoilState(store.showStopButtonByIndex(index));
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ export default function DataTable<TData, TValue>({ columns, data }: DataTablePro
|
||||||
deleteFiles({ files: filesToDelete as TFile[] });
|
deleteFiles({ files: filesToDelete as TFile[] });
|
||||||
setRowSelection({});
|
setRowSelection({});
|
||||||
}}
|
}}
|
||||||
className="dark:hover:bg-gray-850/25 ml-1 gap-2 sm:ml-0"
|
className="ml-1 gap-2 dark:hover:bg-gray-850/25 sm:ml-0"
|
||||||
disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting}
|
disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting}
|
||||||
>
|
>
|
||||||
{isDeleting ? (
|
{isDeleting ? (
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ export default function HoverButtons({
|
||||||
const { endpoint: _endpoint, endpointType } = conversation ?? {};
|
const { endpoint: _endpoint, endpointType } = conversation ?? {};
|
||||||
const endpoint = endpointType ?? _endpoint;
|
const endpoint = endpointType ?? _endpoint;
|
||||||
const [isCopied, setIsCopied] = useState(false);
|
const [isCopied, setIsCopied] = useState(false);
|
||||||
const [TextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
|
const [TextToSpeech] = useRecoilState<boolean>(store.textToSpeech);
|
||||||
|
|
||||||
const {
|
const {
|
||||||
hideEditButton,
|
hideEditButton,
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ export default function DataTableFile<TData, TValue>({
|
||||||
deleteFiles({ files: filesToDelete as TFile[] });
|
deleteFiles({ files: filesToDelete as TFile[] });
|
||||||
setRowSelection({});
|
setRowSelection({});
|
||||||
}}
|
}}
|
||||||
className="dark:hover:bg-gray-850/25 ml-1 gap-2 sm:ml-0"
|
className="ml-1 gap-2 dark:hover:bg-gray-850/25 sm:ml-0"
|
||||||
disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting}
|
disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting}
|
||||||
>
|
>
|
||||||
{isDeleting ? (
|
{isDeleting ? (
|
||||||
|
|
|
||||||
|
|
@ -75,18 +75,21 @@ export const fileTableColumns: ColumnDef<TFile>[] = [
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
{attachedVectorStores.map((vectorStore, index) => {
|
{attachedVectorStores.map((vectorStore, index) => {
|
||||||
if (index === 4)
|
if (index === 4) {
|
||||||
{return (
|
return (
|
||||||
<span
|
<span
|
||||||
key={index}
|
key={index}
|
||||||
className="ml-2 mt-2 flex w-fit flex-row items-center rounded-full bg-[#f5f5f5] px-2 text-gray-500"
|
className="ml-2 mt-2 flex w-fit flex-row items-center rounded-full bg-[#f5f5f5] px-2 text-gray-500"
|
||||||
>
|
>
|
||||||
<PlusIcon className="h-3 w-3" />
|
<PlusIcon className="h-3 w-3" />
|
||||||
|
|
||||||
{attachedVectorStores.length - index} more
|
{attachedVectorStores.length - index} more
|
||||||
</span>
|
</span>
|
||||||
);}
|
);
|
||||||
if (index > 4) {return null;}
|
}
|
||||||
|
if (index > 4) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<span key={index} className="ml-2 mt-2 rounded-full bg-[#f2f8ec] px-2 text-[#91c561]">
|
<span key={index} className="ml-2 mt-2 rounded-full bg-[#f2f8ec] px-2 text-[#91c561]">
|
||||||
{vectorStore.name}
|
{vectorStore.name}
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ export default function ScrollToBottom({ scrollHandler }: Props) {
|
||||||
return (
|
return (
|
||||||
<button
|
<button
|
||||||
onClick={scrollHandler}
|
onClick={scrollHandler}
|
||||||
className="dark:bg-gray-850/90 absolute bottom-5 right-1/2 cursor-pointer rounded-full border border-gray-200 bg-white bg-clip-padding text-gray-600 dark:border-white/10 dark:text-gray-200"
|
className="absolute bottom-5 right-1/2 cursor-pointer rounded-full border border-gray-200 bg-white bg-clip-padding text-gray-600 dark:border-white/10 dark:bg-gray-850/90 dark:text-gray-200"
|
||||||
>
|
>
|
||||||
<svg
|
<svg
|
||||||
width="24"
|
width="24"
|
||||||
|
|
|
||||||
|
|
@ -10,18 +10,16 @@ export default function ConversationModeSwitch({
|
||||||
}) {
|
}) {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [conversationMode, setConversationMode] = useRecoilState<boolean>(store.conversationMode);
|
const [conversationMode, setConversationMode] = useRecoilState<boolean>(store.conversationMode);
|
||||||
const [advancedMode] = useRecoilState<boolean>(store.advancedMode);
|
const [speechToText] = useRecoilState<boolean>(store.speechToText);
|
||||||
const [textToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
|
const [textToSpeech] = useRecoilState<boolean>(store.textToSpeech);
|
||||||
const [, setAutoSendText] = useRecoilState<boolean>(store.autoSendText);
|
const [, setAutoSendText] = useRecoilState<boolean>(store.autoSendText);
|
||||||
const [, setDecibelValue] = useRecoilState(store.decibelValue);
|
const [, setDecibelValue] = useRecoilState(store.decibelValue);
|
||||||
const [, setAutoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
const [, setAutoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||||
|
|
||||||
const handleCheckedChange = (value: boolean) => {
|
const handleCheckedChange = (value: boolean) => {
|
||||||
if (!advancedMode) {
|
setAutoTranscribeAudio(value);
|
||||||
setAutoTranscribeAudio(value);
|
setAutoSendText(value);
|
||||||
setAutoSendText(value);
|
setDecibelValue(-45);
|
||||||
setDecibelValue(-45);
|
|
||||||
}
|
|
||||||
setConversationMode(value);
|
setConversationMode(value);
|
||||||
if (onCheckedChange) {
|
if (onCheckedChange) {
|
||||||
onCheckedChange(value);
|
onCheckedChange(value);
|
||||||
|
|
@ -40,7 +38,7 @@ export default function ConversationModeSwitch({
|
||||||
onCheckedChange={handleCheckedChange}
|
onCheckedChange={handleCheckedChange}
|
||||||
className="ml-4"
|
className="ml-4"
|
||||||
data-testid="ConversationMode"
|
data-testid="ConversationMode"
|
||||||
disabled={!textToSpeech}
|
disabled={!textToSpeech || !speechToText}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ export default function AutoSendTextSwitch({
|
||||||
}) {
|
}) {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [autoSendText, setAutoSendText] = useRecoilState<boolean>(store.autoSendText);
|
const [autoSendText, setAutoSendText] = useRecoilState<boolean>(store.autoSendText);
|
||||||
const [SpeechToText] = useRecoilState<boolean>(store.SpeechToText);
|
const [SpeechToText] = useRecoilState<boolean>(store.speechToText);
|
||||||
|
|
||||||
const handleCheckedChange = (value: boolean) => {
|
const handleCheckedChange = (value: boolean) => {
|
||||||
setAutoSendText(value);
|
setAutoSendText(value);
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ export default function AutoTranscribeAudioSwitch({
|
||||||
const [autoTranscribeAudio, setAutoTranscribeAudio] = useRecoilState<boolean>(
|
const [autoTranscribeAudio, setAutoTranscribeAudio] = useRecoilState<boolean>(
|
||||||
store.autoTranscribeAudio,
|
store.autoTranscribeAudio,
|
||||||
);
|
);
|
||||||
const [speechToText] = useRecoilState<boolean>(store.SpeechToText);
|
const [speechToText] = useRecoilState<boolean>(store.speechToText);
|
||||||
|
|
||||||
const handleCheckedChange = (value: boolean) => {
|
const handleCheckedChange = (value: boolean) => {
|
||||||
setAutoTranscribeAudio(value);
|
setAutoTranscribeAudio(value);
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import { cn, defaultTextProps, optionText } from '~/utils/';
|
||||||
|
|
||||||
export default function DecibelSelector() {
|
export default function DecibelSelector() {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const speechToText = useRecoilValue(store.SpeechToText);
|
const speechToText = useRecoilValue(store.speechToText);
|
||||||
const [decibelValue, setDecibelValue] = useRecoilState(store.decibelValue);
|
const [decibelValue, setDecibelValue] = useRecoilState(store.decibelValue);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
|
||||||
|
|
@ -5,21 +5,21 @@ import store from '~/store';
|
||||||
|
|
||||||
export default function EngineSTTDropdown() {
|
export default function EngineSTTDropdown() {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [endpointSTT, setEndpointSTT] = useRecoilState<string>(store.endpointSTT);
|
const [engineSTT, setEngineSTT] = useRecoilState<string>(store.engineSTT);
|
||||||
const endpointOptions = [
|
const endpointOptions = [
|
||||||
{ value: 'browser', display: localize('com_nav_browser') },
|
{ value: 'browser', display: localize('com_nav_browser') },
|
||||||
{ value: 'external', display: localize('com_nav_external') },
|
{ value: 'external', display: localize('com_nav_external') },
|
||||||
];
|
];
|
||||||
|
|
||||||
const handleSelect = (value: string) => {
|
const handleSelect = (value: string) => {
|
||||||
setEndpointSTT(value);
|
setEngineSTT(value);
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<div>{localize('com_nav_engine')}</div>
|
<div>{localize('com_nav_engine')}</div>
|
||||||
<Dropdown
|
<Dropdown
|
||||||
value={endpointSTT}
|
value={engineSTT}
|
||||||
onChange={handleSelect}
|
onChange={handleSelect}
|
||||||
options={endpointOptions}
|
options={endpointOptions}
|
||||||
width={180}
|
width={180}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
import { useRecoilState } from 'recoil';
|
||||||
|
import { Dropdown } from '~/components/ui';
|
||||||
|
import { useLocalize } from '~/hooks';
|
||||||
|
import store from '~/store';
|
||||||
|
|
||||||
|
export default function LanguageSTTDropdown() {
|
||||||
|
const localize = useLocalize();
|
||||||
|
const [languageSTT, setLanguageSTT] = useRecoilState<string>(store.languageSTT);
|
||||||
|
|
||||||
|
const languageOptions = [
|
||||||
|
{ value: 'af', display: 'Afrikaans' },
|
||||||
|
{ value: 'eu', display: 'Basque' },
|
||||||
|
{ value: 'bg', display: 'Bulgarian' },
|
||||||
|
{ value: 'ca', display: 'Catalan' },
|
||||||
|
{ value: 'ar-EG', display: 'Arabic (Egypt)' },
|
||||||
|
{ value: 'ar-JO', display: 'Arabic (Jordan)' },
|
||||||
|
{ value: 'ar-KW', display: 'Arabic (Kuwait)' },
|
||||||
|
{ value: 'ar-LB', display: 'Arabic (Lebanon)' },
|
||||||
|
{ value: 'ar-QA', display: 'Arabic (Qatar)' },
|
||||||
|
{ value: 'ar-AE', display: 'Arabic (UAE)' },
|
||||||
|
{ value: 'ar-MA', display: 'Arabic (Morocco)' },
|
||||||
|
{ value: 'ar-IQ', display: 'Arabic (Iraq)' },
|
||||||
|
{ value: 'ar-DZ', display: 'Arabic (Algeria)' },
|
||||||
|
{ value: 'ar-BH', display: 'Arabic (Bahrain)' },
|
||||||
|
{ value: 'ar-LY', display: 'Arabic (Libya)' },
|
||||||
|
{ value: 'ar-OM', display: 'Arabic (Oman)' },
|
||||||
|
{ value: 'ar-SA', display: 'Arabic (Saudi Arabia)' },
|
||||||
|
{ value: 'ar-TN', display: 'Arabic (Tunisia)' },
|
||||||
|
{ value: 'ar-YE', display: 'Arabic (Yemen)' },
|
||||||
|
{ value: 'cs', display: 'Czech' },
|
||||||
|
{ value: 'nl-NL', display: 'Dutch' },
|
||||||
|
{ value: 'en-AU', display: 'English (Australia)' },
|
||||||
|
{ value: 'en-CA', display: 'English (Canada)' },
|
||||||
|
{ value: 'en-IN', display: 'English (India)' },
|
||||||
|
{ value: 'en-NZ', display: 'English (New Zealand)' },
|
||||||
|
{ value: 'en-ZA', display: 'English (South Africa)' },
|
||||||
|
{ value: 'en-GB', display: 'English (UK)' },
|
||||||
|
{ value: 'en-US', display: 'English (US)' },
|
||||||
|
{ value: 'fi', display: 'Finnish' },
|
||||||
|
{ value: 'fr-FR', display: 'French' },
|
||||||
|
{ value: 'gl', display: 'Galician' },
|
||||||
|
{ value: 'de-DE', display: 'German' },
|
||||||
|
{ value: 'el-GR', display: 'Greek' },
|
||||||
|
{ value: 'he', display: 'Hebrew' },
|
||||||
|
{ value: 'hu', display: 'Hungarian' },
|
||||||
|
{ value: 'is', display: 'Icelandic' },
|
||||||
|
{ value: 'it-IT', display: 'Italian' },
|
||||||
|
{ value: 'id', display: 'Indonesian' },
|
||||||
|
{ value: 'ja', display: 'Japanese' },
|
||||||
|
{ value: 'ko', display: 'Korean' },
|
||||||
|
{ value: 'la', display: 'Latin' },
|
||||||
|
{ value: 'zh-CN', display: 'Mandarin Chinese' },
|
||||||
|
{ value: 'zh-TW', display: 'Taiwanese' },
|
||||||
|
{ value: 'zh-HK', display: 'Cantonese' },
|
||||||
|
{ value: 'ms-MY', display: 'Malaysian' },
|
||||||
|
{ value: 'no-NO', display: 'Norwegian' },
|
||||||
|
{ value: 'pl', display: 'Polish' },
|
||||||
|
{ value: 'xx-piglatin', display: 'Pig Latin' },
|
||||||
|
{ value: 'pt-PT', display: 'Portuguese' },
|
||||||
|
{ value: 'pt-br', display: 'Portuguese (Brasil)' },
|
||||||
|
{ value: 'ro-RO', display: 'Romanian' },
|
||||||
|
{ value: 'ru', display: 'Russian' },
|
||||||
|
{ value: 'sr-SP', display: 'Serbian' },
|
||||||
|
{ value: 'sk', display: 'Slovak' },
|
||||||
|
{ value: 'es-AR', display: 'Spanish (Argentina)' },
|
||||||
|
{ value: 'es-BO', display: 'Spanish (Bolivia)' },
|
||||||
|
{ value: 'es-CL', display: 'Spanish (Chile)' },
|
||||||
|
{ value: 'es-CO', display: 'Spanish (Colombia)' },
|
||||||
|
{ value: 'es-CR', display: 'Spanish (Costa Rica)' },
|
||||||
|
{ value: 'es-DO', display: 'Spanish (Dominican Republic)' },
|
||||||
|
{ value: 'es-EC', display: 'Spanish (Ecuador)' },
|
||||||
|
{ value: 'es-SV', display: 'Spanish (El Salvador)' },
|
||||||
|
{ value: 'es-GT', display: 'Spanish (Guatemala)' },
|
||||||
|
{ value: 'es-HN', display: 'Spanish (Honduras)' },
|
||||||
|
{ value: 'es-MX', display: 'Spanish (Mexico)' },
|
||||||
|
{ value: 'es-NI', display: 'Spanish (Nicaragua)' },
|
||||||
|
{ value: 'es-PA', display: 'Spanish (Panama)' },
|
||||||
|
{ value: 'es-PY', display: 'Spanish (Paraguay)' },
|
||||||
|
{ value: 'es-PE', display: 'Spanish (Peru)' },
|
||||||
|
{ value: 'es-PR', display: 'Spanish (Puerto Rico)' },
|
||||||
|
{ value: 'es-ES', display: 'Spanish (Spain)' },
|
||||||
|
{ value: 'es-US', display: 'Spanish (US)' },
|
||||||
|
{ value: 'es-UY', display: 'Spanish (Uruguay)' },
|
||||||
|
{ value: 'es-VE', display: 'Spanish (Venezuela)' },
|
||||||
|
{ value: 'sv-SE', display: 'Swedish' },
|
||||||
|
{ value: 'tr', display: 'Turkish' },
|
||||||
|
{ value: 'zu', display: 'Zulu' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const handleSelect = (value: string) => {
|
||||||
|
setLanguageSTT(value);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div>{localize('com_nav_language')}</div>
|
||||||
|
<Dropdown
|
||||||
|
value={languageSTT}
|
||||||
|
onChange={handleSelect}
|
||||||
|
options={languageOptions}
|
||||||
|
width={220}
|
||||||
|
position={'left'}
|
||||||
|
testId="LanguageSTTDropdown"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -9,7 +9,7 @@ export default function SpeechToTextSwitch({
|
||||||
onCheckedChange?: (value: boolean) => void;
|
onCheckedChange?: (value: boolean) => void;
|
||||||
}) {
|
}) {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [speechToText, setSpeechToText] = useRecoilState<boolean>(store.SpeechToText);
|
const [speechToText, setSpeechToText] = useRecoilState<boolean>(store.speechToText);
|
||||||
|
|
||||||
const handleCheckedChange = (value: boolean) => {
|
const handleCheckedChange = (value: boolean) => {
|
||||||
setSpeechToText(value);
|
setSpeechToText(value);
|
||||||
|
|
|
||||||
|
|
@ -3,3 +3,4 @@ export { default as SpeechToTextSwitch } from './SpeechToTextSwitch';
|
||||||
export { default as EngineSTTDropdown } from './EngineSTTDropdown';
|
export { default as EngineSTTDropdown } from './EngineSTTDropdown';
|
||||||
export { default as DecibelSelector } from './DecibelSelector';
|
export { default as DecibelSelector } from './DecibelSelector';
|
||||||
export { default as AutoTranscribeAudioSwitch } from './AutoTranscribeAudioSwitch';
|
export { default as AutoTranscribeAudioSwitch } from './AutoTranscribeAudioSwitch';
|
||||||
|
export { default as LanguageSTTDropdown } from './LanguageSTTDropdown';
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import * as Tabs from '@radix-ui/react-tabs';
|
import * as Tabs from '@radix-ui/react-tabs';
|
||||||
import { SettingsTabValues } from 'librechat-data-provider';
|
import { SettingsTabValues } from 'librechat-data-provider';
|
||||||
import React, { useState, useRef } from 'react';
|
import React, { useState, useRef, useEffect, useCallback } from 'react';
|
||||||
import { useRecoilState } from 'recoil';
|
import { useRecoilState } from 'recoil';
|
||||||
import { Lightbulb, Cog } from 'lucide-react';
|
import { Lightbulb, Cog } from 'lucide-react';
|
||||||
import { useOnClickOutside, useMediaQuery } from '~/hooks';
|
import { useOnClickOutside, useMediaQuery } from '~/hooks';
|
||||||
|
|
@ -10,7 +10,7 @@ import ConversationModeSwitch from './ConversationModeSwitch';
|
||||||
import {
|
import {
|
||||||
TextToSpeechSwitch,
|
TextToSpeechSwitch,
|
||||||
EngineTTSDropdown,
|
EngineTTSDropdown,
|
||||||
AutomaticPlayback,
|
AutomaticPlaybackSwitch,
|
||||||
CacheTTSSwitch,
|
CacheTTSSwitch,
|
||||||
VoiceDropdown,
|
VoiceDropdown,
|
||||||
PlaybackRate,
|
PlaybackRate,
|
||||||
|
|
@ -18,16 +18,100 @@ import {
|
||||||
import {
|
import {
|
||||||
DecibelSelector,
|
DecibelSelector,
|
||||||
EngineSTTDropdown,
|
EngineSTTDropdown,
|
||||||
|
LanguageSTTDropdown,
|
||||||
SpeechToTextSwitch,
|
SpeechToTextSwitch,
|
||||||
AutoSendTextSwitch,
|
AutoSendTextSwitch,
|
||||||
AutoTranscribeAudioSwitch,
|
AutoTranscribeAudioSwitch,
|
||||||
} from './STT';
|
} from './STT';
|
||||||
|
import { useCustomConfigSpeechQuery } from '~/data-provider';
|
||||||
|
|
||||||
function Speech() {
|
function Speech() {
|
||||||
const isSmallScreen = useMediaQuery('(max-width: 767px)');
|
|
||||||
const [advancedMode, setAdvancedMode] = useRecoilState<boolean>(store.advancedMode);
|
|
||||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
|
||||||
const [confirmClear, setConfirmClear] = useState(false);
|
const [confirmClear, setConfirmClear] = useState(false);
|
||||||
|
const { data } = useCustomConfigSpeechQuery();
|
||||||
|
const isSmallScreen = useMediaQuery('(max-width: 767px)');
|
||||||
|
|
||||||
|
const [advancedMode, setAdvancedMode] = useRecoilState(store.advancedMode);
|
||||||
|
const [autoTranscribeAudio, setAutoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio);
|
||||||
|
const [conversationMode, setConversationMode] = useRecoilState(store.conversationMode);
|
||||||
|
const [speechToText, setSpeechToText] = useRecoilState(store.speechToText);
|
||||||
|
const [textToSpeech, setTextToSpeech] = useRecoilState(store.textToSpeech);
|
||||||
|
const [cacheTTS, setCacheTTS] = useRecoilState(store.cacheTTS);
|
||||||
|
const [engineSTT, setEngineSTT] = useRecoilState<string>(store.engineSTT);
|
||||||
|
const [languageSTT, setLanguageSTT] = useRecoilState<string>(store.languageSTT);
|
||||||
|
const [decibelValue, setDecibelValue] = useRecoilState(store.decibelValue);
|
||||||
|
const [autoSendText, setAutoSendText] = useRecoilState(store.autoSendText);
|
||||||
|
const [engineTTS, setEngineTTS] = useRecoilState<string>(store.engineTTS);
|
||||||
|
const [voice, setVoice] = useRecoilState<string>(store.voice);
|
||||||
|
const [languageTTS, setLanguageTTS] = useRecoilState<string>(store.languageTTS);
|
||||||
|
const [automaticPlayback, setAutomaticPlayback] = useRecoilState(store.automaticPlayback);
|
||||||
|
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
|
||||||
|
|
||||||
|
const updateSetting = useCallback(
|
||||||
|
(key, newValue) => {
|
||||||
|
const settings = {
|
||||||
|
conversationMode: { value: conversationMode, setFunc: setConversationMode },
|
||||||
|
advancedMode: { value: advancedMode, setFunc: setAdvancedMode },
|
||||||
|
speechToText: { value: speechToText, setFunc: setSpeechToText },
|
||||||
|
textToSpeech: { value: textToSpeech, setFunc: setTextToSpeech },
|
||||||
|
cacheTTS: { value: cacheTTS, setFunc: setCacheTTS },
|
||||||
|
engineSTT: { value: engineSTT, setFunc: setEngineSTT },
|
||||||
|
languageSTT: { value: languageSTT, setFunc: setLanguageSTT },
|
||||||
|
autoTranscribeAudio: { value: autoTranscribeAudio, setFunc: setAutoTranscribeAudio },
|
||||||
|
decibelValue: { value: decibelValue, setFunc: setDecibelValue },
|
||||||
|
autoSendText: { value: autoSendText, setFunc: setAutoSendText },
|
||||||
|
engineTTS: { value: engineTTS, setFunc: setEngineTTS },
|
||||||
|
voice: { value: voice, setFunc: setVoice },
|
||||||
|
languageTTS: { value: languageTTS, setFunc: setLanguageTTS },
|
||||||
|
automaticPlayback: { value: automaticPlayback, setFunc: setAutomaticPlayback },
|
||||||
|
playbackRate: { value: playbackRate, setFunc: setPlaybackRate },
|
||||||
|
};
|
||||||
|
|
||||||
|
if (settings[key]) {
|
||||||
|
const setting = settings[key];
|
||||||
|
setting.setFunc(newValue);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[
|
||||||
|
conversationMode,
|
||||||
|
advancedMode,
|
||||||
|
speechToText,
|
||||||
|
textToSpeech,
|
||||||
|
cacheTTS,
|
||||||
|
engineSTT,
|
||||||
|
languageSTT,
|
||||||
|
autoTranscribeAudio,
|
||||||
|
decibelValue,
|
||||||
|
autoSendText,
|
||||||
|
engineTTS,
|
||||||
|
voice,
|
||||||
|
languageTTS,
|
||||||
|
automaticPlayback,
|
||||||
|
playbackRate,
|
||||||
|
setConversationMode,
|
||||||
|
setAdvancedMode,
|
||||||
|
setSpeechToText,
|
||||||
|
setTextToSpeech,
|
||||||
|
setCacheTTS,
|
||||||
|
setEngineSTT,
|
||||||
|
setLanguageSTT,
|
||||||
|
setAutoTranscribeAudio,
|
||||||
|
setDecibelValue,
|
||||||
|
setAutoSendText,
|
||||||
|
setEngineTTS,
|
||||||
|
setVoice,
|
||||||
|
setLanguageTTS,
|
||||||
|
setAutomaticPlayback,
|
||||||
|
setPlaybackRate,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (data) {
|
||||||
|
Object.entries(data).forEach(([key, value]) => {
|
||||||
|
updateSetting(key, value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
const contentRef = useRef(null);
|
const contentRef = useRef(null);
|
||||||
useOnClickOutside(contentRef, () => confirmClear && setConfirmClear(false), []);
|
useOnClickOutside(contentRef, () => confirmClear && setConfirmClear(false), []);
|
||||||
|
|
@ -91,13 +175,13 @@ function Speech() {
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
<EngineSTTDropdown />
|
<EngineSTTDropdown />
|
||||||
</div>
|
</div>
|
||||||
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
|
<LanguageSTTDropdown />
|
||||||
|
</div>
|
||||||
<div className="h-px bg-black/20 bg-white/20" role="none" />
|
<div className="h-px bg-black/20 bg-white/20" role="none" />
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
<TextToSpeechSwitch />
|
<TextToSpeechSwitch />
|
||||||
</div>
|
</div>
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
|
||||||
<AutomaticPlayback />
|
|
||||||
</div>
|
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
<EngineTTSDropdown />
|
<EngineTTSDropdown />
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -119,6 +203,9 @@ function Speech() {
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
<EngineSTTDropdown />
|
<EngineSTTDropdown />
|
||||||
</div>
|
</div>
|
||||||
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
|
<LanguageSTTDropdown />
|
||||||
|
</div>
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
<AutoTranscribeAudioSwitch />
|
<AutoTranscribeAudioSwitch />
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -135,7 +222,7 @@ function Speech() {
|
||||||
<TextToSpeechSwitch />
|
<TextToSpeechSwitch />
|
||||||
</div>
|
</div>
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
<AutomaticPlayback />
|
<AutomaticPlaybackSwitch />
|
||||||
</div>
|
</div>
|
||||||
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
<div className="border-b pb-3 last-of-type:border-b-0 dark:border-gray-700">
|
||||||
<EngineTTSDropdown />
|
<EngineTTSDropdown />
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ import { Switch } from '~/components/ui';
|
||||||
import { useLocalize } from '~/hooks';
|
import { useLocalize } from '~/hooks';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
export default function AutomaticPlayback({
|
export default function AutomaticPlaybackSwitch({
|
||||||
onCheckedChange,
|
onCheckedChange,
|
||||||
}: {
|
}: {
|
||||||
onCheckedChange?: (value: boolean) => void;
|
onCheckedChange?: (value: boolean) => void;
|
||||||
|
|
@ -10,7 +10,7 @@ export default function CacheTTSSwitch({
|
||||||
}) {
|
}) {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [cacheTTS, setCacheTTS] = useRecoilState<boolean>(store.cacheTTS);
|
const [cacheTTS, setCacheTTS] = useRecoilState<boolean>(store.cacheTTS);
|
||||||
const [textToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
|
const [textToSpeech] = useRecoilState<boolean>(store.textToSpeech);
|
||||||
|
|
||||||
const handleCheckedChange = (value: boolean) => {
|
const handleCheckedChange = (value: boolean) => {
|
||||||
setCacheTTS(value);
|
setCacheTTS(value);
|
||||||
|
|
|
||||||
|
|
@ -5,21 +5,21 @@ import store from '~/store';
|
||||||
|
|
||||||
export default function EngineTTSDropdown() {
|
export default function EngineTTSDropdown() {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [endpointTTS, setEndpointTTS] = useRecoilState<string>(store.endpointTTS);
|
const [engineTTS, setEngineTTS] = useRecoilState<string>(store.engineTTS);
|
||||||
const endpointOptions = [
|
const endpointOptions = [
|
||||||
{ value: 'browser', display: localize('com_nav_browser') },
|
{ value: 'browser', display: localize('com_nav_browser') },
|
||||||
{ value: 'external', display: localize('com_nav_external') },
|
{ value: 'external', display: localize('com_nav_external') },
|
||||||
];
|
];
|
||||||
|
|
||||||
const handleSelect = (value: string) => {
|
const handleSelect = (value: string) => {
|
||||||
setEndpointTTS(value);
|
setEngineTTS(value);
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<div>{localize('com_nav_engine')}</div>
|
<div>{localize('com_nav_engine')}</div>
|
||||||
<Dropdown
|
<Dropdown
|
||||||
value={endpointTTS}
|
value={engineTTS}
|
||||||
onChange={handleSelect}
|
onChange={handleSelect}
|
||||||
options={endpointOptions}
|
options={endpointOptions}
|
||||||
width={180}
|
width={180}
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import { cn, defaultTextProps, optionText } from '~/utils/';
|
||||||
|
|
||||||
export default function DecibelSelector() {
|
export default function DecibelSelector() {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const textToSpeech = useRecoilValue(store.TextToSpeech);
|
const textToSpeech = useRecoilValue(store.textToSpeech);
|
||||||
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
|
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ export default function TextToSpeechSwitch({
|
||||||
onCheckedChange?: (value: boolean) => void;
|
onCheckedChange?: (value: boolean) => void;
|
||||||
}) {
|
}) {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const [TextToSpeech, setTextToSpeech] = useRecoilState<boolean>(store.TextToSpeech);
|
const [TextToSpeech, setTextToSpeech] = useRecoilState<boolean>(store.textToSpeech);
|
||||||
|
|
||||||
const handleCheckedChange = (value: boolean) => {
|
const handleCheckedChange = (value: boolean) => {
|
||||||
setTextToSpeech(value);
|
setTextToSpeech(value);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
import React from 'react';
|
||||||
|
import '@testing-library/jest-dom/extend-expect';
|
||||||
|
import { render, fireEvent } from 'test/layout-test-utils';
|
||||||
|
import AutomaticPlaybackSwitch from '../AutomaticPlaybackSwitch';
|
||||||
|
import { RecoilRoot } from 'recoil';
|
||||||
|
|
||||||
|
describe('AutomaticPlaybackSwitch', () => {
|
||||||
|
/**
|
||||||
|
* Mock function to set the text-to-speech state.
|
||||||
|
*/
|
||||||
|
let mockSetAutomaticPlayback: jest.Mock<void, [boolean]> | ((value: boolean) => void) | undefined;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
mockSetAutomaticPlayback = jest.fn();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('renders correctly', () => {
|
||||||
|
const { getByTestId } = render(
|
||||||
|
<RecoilRoot>
|
||||||
|
<AutomaticPlaybackSwitch />
|
||||||
|
</RecoilRoot>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(getByTestId('AutomaticPlayback')).toBeInTheDocument();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calls onCheckedChange when the switch is toggled', () => {
|
||||||
|
const { getByTestId } = render(
|
||||||
|
<RecoilRoot>
|
||||||
|
<AutomaticPlaybackSwitch onCheckedChange={mockSetAutomaticPlayback} />
|
||||||
|
</RecoilRoot>,
|
||||||
|
);
|
||||||
|
const switchElement = getByTestId('AutomaticPlayback');
|
||||||
|
fireEvent.click(switchElement);
|
||||||
|
|
||||||
|
expect(mockSetAutomaticPlayback).toHaveBeenCalledWith(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
export { default as AutomaticPlayback } from './AutomaticPlayback';
|
export { default as AutomaticPlaybackSwitch } from './AutomaticPlaybackSwitch';
|
||||||
export { default as CacheTTSSwitch } from './CacheTTSSwitch';
|
export { default as CacheTTSSwitch } from './CacheTTSSwitch';
|
||||||
export { default as EngineTTSDropdown } from './EngineTTSDropdown';
|
export { default as EngineTTSDropdown } from './EngineTTSDropdown';
|
||||||
export { default as PlaybackRate } from './PlaybackRate';
|
export { default as PlaybackRate } from './PlaybackRate';
|
||||||
|
|
|
||||||
|
|
@ -423,6 +423,13 @@ export const useVoicesQuery = (): UseQueryResult<t.VoiceResponse> => {
|
||||||
return useQuery([QueryKeys.voices], () => dataService.getVoices());
|
return useQuery([QueryKeys.voices], () => dataService.getVoices());
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Custom config speech */
|
||||||
|
export const useCustomConfigSpeechQuery = (): UseQueryResult<t.getCustomConfigSpeechResponse> => {
|
||||||
|
return useQuery([QueryKeys.customConfigSpeech], () => dataService.getCustomConfigSpeech());
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Prompt */
|
||||||
|
|
||||||
export const usePromptGroupsInfiniteQuery = (
|
export const usePromptGroupsInfiniteQuery = (
|
||||||
params?: t.TPromptGroupsWithFilterRequest,
|
params?: t.TPromptGroupsWithFilterRequest,
|
||||||
config?: UseInfiniteQueryOptions<t.PromptGroupListResponse, unknown>,
|
config?: UseInfiniteQueryOptions<t.PromptGroupListResponse, unknown>,
|
||||||
|
|
|
||||||
|
|
@ -8,3 +8,4 @@ export { default as useRequiresKey } from './useRequiresKey';
|
||||||
export { default as useMultipleKeys } from './useMultipleKeys';
|
export { default as useMultipleKeys } from './useMultipleKeys';
|
||||||
export { default as useSpeechToText } from './useSpeechToText';
|
export { default as useSpeechToText } from './useSpeechToText';
|
||||||
export { default as useTextToSpeech } from './useTextToSpeech';
|
export { default as useTextToSpeech } from './useTextToSpeech';
|
||||||
|
export { default as useGetAudioSettings } from './useGetAudioSettings';
|
||||||
|
|
|
||||||
19
client/src/hooks/Input/useGetAudioSettings.tsx
Normal file
19
client/src/hooks/Input/useGetAudioSettings.tsx
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
import { useRecoilState } from 'recoil';
|
||||||
|
import store from '~/store';
|
||||||
|
|
||||||
|
export enum AudioEndpoints {
|
||||||
|
browser = 'browser',
|
||||||
|
external = 'external',
|
||||||
|
}
|
||||||
|
|
||||||
|
const useGetAudioSettings = () => {
|
||||||
|
const [engineSTT] = useRecoilState<string>(store.engineSTT);
|
||||||
|
const [engineTTS] = useRecoilState<string>(store.engineTTS);
|
||||||
|
|
||||||
|
const externalSpeechToText = engineSTT === AudioEndpoints.external;
|
||||||
|
const externalTextToSpeech = engineTTS === AudioEndpoints.external;
|
||||||
|
|
||||||
|
return { externalSpeechToText, externalTextToSpeech };
|
||||||
|
};
|
||||||
|
|
||||||
|
export default useGetAudioSettings;
|
||||||
|
|
@ -1,17 +1,16 @@
|
||||||
import { useState, useEffect } from 'react';
|
import { useState, useEffect } from 'react';
|
||||||
import useSpeechToTextBrowser from './useSpeechToTextBrowser';
|
import useSpeechToTextBrowser from './useSpeechToTextBrowser';
|
||||||
import useSpeechToTextExternal from './useSpeechToTextExternal';
|
import useSpeechToTextExternal from './useSpeechToTextExternal';
|
||||||
import { useRecoilState } from 'recoil';
|
import useGetAudioSettings from './useGetAudioSettings';
|
||||||
import store from '~/store';
|
|
||||||
|
|
||||||
const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
|
const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
|
||||||
const [endpointSTT] = useRecoilState<string>(store.endpointSTT);
|
const { externalSpeechToText } = useGetAudioSettings();
|
||||||
const useExternalSpeechToText = endpointSTT === 'external';
|
|
||||||
const [animatedText, setAnimatedText] = useState('');
|
const [animatedText, setAnimatedText] = useState('');
|
||||||
|
|
||||||
const {
|
const {
|
||||||
isListening: speechIsListeningBrowser,
|
isListening: speechIsListeningBrowser,
|
||||||
isLoading: speechIsLoadingBrowser,
|
isLoading: speechIsLoadingBrowser,
|
||||||
|
interimTranscript: interimTranscriptBrowser,
|
||||||
text: speechTextBrowser,
|
text: speechTextBrowser,
|
||||||
startRecording: startSpeechRecordingBrowser,
|
startRecording: startSpeechRecordingBrowser,
|
||||||
stopRecording: stopSpeechRecordingBrowser,
|
stopRecording: stopSpeechRecordingBrowser,
|
||||||
|
|
@ -26,21 +25,21 @@ const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) =>
|
||||||
clearText,
|
clearText,
|
||||||
} = useSpeechToTextExternal(handleTranscriptionComplete);
|
} = useSpeechToTextExternal(handleTranscriptionComplete);
|
||||||
|
|
||||||
const isListening = useExternalSpeechToText
|
const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser;
|
||||||
? speechIsListeningExternal
|
const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
|
||||||
: speechIsListeningBrowser;
|
const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser;
|
||||||
const isLoading = useExternalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
|
const startRecording = externalSpeechToText
|
||||||
const speechTextForm = useExternalSpeechToText ? speechTextExternal : speechTextBrowser;
|
|
||||||
const startRecording = useExternalSpeechToText
|
|
||||||
? startSpeechRecordingExternal
|
? startSpeechRecordingExternal
|
||||||
: startSpeechRecordingBrowser;
|
: startSpeechRecordingBrowser;
|
||||||
const stopRecording = useExternalSpeechToText
|
const stopRecording = externalSpeechToText
|
||||||
? stopSpeechRecordingExternal
|
? stopSpeechRecordingExternal
|
||||||
: stopSpeechRecordingBrowser;
|
: stopSpeechRecordingBrowser;
|
||||||
const speechText =
|
const speechText =
|
||||||
isListening || (speechTextExternal && speechTextExternal.length > 0)
|
isListening || (speechTextExternal && speechTextExternal.length > 0)
|
||||||
? speechTextExternal
|
? speechTextExternal
|
||||||
: speechTextForm || '';
|
: speechTextForm || '';
|
||||||
|
// for a future real-time STT external
|
||||||
|
const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser;
|
||||||
|
|
||||||
const animateTextTyping = (text: string) => {
|
const animateTextTyping = (text: string) => {
|
||||||
const totalDuration = 2000;
|
const totalDuration = 2000;
|
||||||
|
|
@ -65,17 +64,18 @@ const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) =>
|
||||||
};
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (speechText) {
|
if (speechText && externalSpeechToText) {
|
||||||
animateTextTyping(speechText);
|
animateTextTyping(speechText);
|
||||||
}
|
}
|
||||||
}, [speechText]);
|
}, [speechText, externalSpeechToText]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
isListening,
|
isListening,
|
||||||
isLoading,
|
isLoading,
|
||||||
startRecording,
|
startRecording,
|
||||||
stopRecording,
|
stopRecording,
|
||||||
speechText: animatedText,
|
interimTranscript,
|
||||||
|
speechText: externalSpeechToText ? animatedText : speechText,
|
||||||
clearText,
|
clearText,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,57 @@
|
||||||
import { useEffect } from 'react';
|
import { useEffect, useState } from 'react';
|
||||||
import { useRecoilState } from 'recoil';
|
import { useRecoilState } from 'recoil';
|
||||||
import { useToastContext } from '~/Providers';
|
import { useToastContext } from '~/Providers';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
|
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
|
||||||
|
import useGetAudioSettings from './useGetAudioSettings';
|
||||||
|
|
||||||
const useSpeechToTextBrowser = () => {
|
const useSpeechToTextBrowser = () => {
|
||||||
const { showToast } = useToastContext();
|
const { showToast } = useToastContext();
|
||||||
const [endpointSTT] = useRecoilState<string>(store.endpointSTT);
|
const [languageSTT] = useRecoilState<string>(store.languageSTT);
|
||||||
|
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||||
|
const { externalSpeechToText } = useGetAudioSettings();
|
||||||
|
const [isListening, setIsListening] = useState(false);
|
||||||
|
|
||||||
const { transcript, listening, resetTranscript, browserSupportsSpeechRecognition } =
|
const {
|
||||||
useSpeechRecognition();
|
interimTranscript,
|
||||||
|
finalTranscript,
|
||||||
|
listening,
|
||||||
|
browserSupportsSpeechRecognition,
|
||||||
|
isMicrophoneAvailable,
|
||||||
|
} = useSpeechRecognition();
|
||||||
|
|
||||||
const toggleListening = () => {
|
const toggleListening = () => {
|
||||||
if (browserSupportsSpeechRecognition) {
|
if (!browserSupportsSpeechRecognition) {
|
||||||
if (listening) {
|
|
||||||
SpeechRecognition.stopListening();
|
|
||||||
} else {
|
|
||||||
SpeechRecognition.startListening();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
showToast({
|
showToast({
|
||||||
message: 'Browser does not support SpeechRecognition',
|
message: 'Browser does not support SpeechRecognition',
|
||||||
status: 'error',
|
status: 'error',
|
||||||
});
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isMicrophoneAvailable) {
|
||||||
|
showToast({
|
||||||
|
message: 'Microphone is not available',
|
||||||
|
status: 'error',
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (listening) {
|
||||||
|
setIsListening(false);
|
||||||
|
SpeechRecognition.stopListening();
|
||||||
|
} else {
|
||||||
|
setIsListening(true);
|
||||||
|
SpeechRecognition.startListening({
|
||||||
|
language: languageSTT,
|
||||||
|
continuous: autoTranscribeAudio,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const handleKeyDown = (e: KeyboardEvent) => {
|
const handleKeyDown = (e: KeyboardEvent) => {
|
||||||
if (e.shiftKey && e.altKey && e.code === 'KeyL' && endpointSTT === 'browser') {
|
if (e.shiftKey && e.altKey && e.code === 'KeyL' && !externalSpeechToText) {
|
||||||
toggleListening();
|
toggleListening();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
@ -37,15 +60,19 @@ const useSpeechToTextBrowser = () => {
|
||||||
return () => window.removeEventListener('keydown', handleKeyDown);
|
return () => window.removeEventListener('keydown', handleKeyDown);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!listening) {
|
||||||
|
setIsListening(false);
|
||||||
|
}
|
||||||
|
}, [listening]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
isListening: listening,
|
isListening,
|
||||||
isLoading: false,
|
isLoading: false,
|
||||||
text: transcript,
|
interimTranscript,
|
||||||
|
text: finalTranscript,
|
||||||
startRecording: toggleListening,
|
startRecording: toggleListening,
|
||||||
stopRecording: () => {
|
stopRecording: toggleListening,
|
||||||
SpeechRecognition.stopListening();
|
|
||||||
resetTranscript();
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,11 +3,12 @@ import { useRecoilState } from 'recoil';
|
||||||
import { useSpeechToTextMutation } from '~/data-provider';
|
import { useSpeechToTextMutation } from '~/data-provider';
|
||||||
import { useToastContext } from '~/Providers';
|
import { useToastContext } from '~/Providers';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
import useGetAudioSettings from './useGetAudioSettings';
|
||||||
|
|
||||||
const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => {
|
const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void) => {
|
||||||
const { showToast } = useToastContext();
|
const { showToast } = useToastContext();
|
||||||
const [endpointSTT] = useRecoilState<string>(store.endpointSTT);
|
const { externalSpeechToText } = useGetAudioSettings();
|
||||||
const [speechToText] = useRecoilState<boolean>(store.SpeechToText);
|
const [speechToText] = useRecoilState<boolean>(store.speechToText);
|
||||||
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);
|
||||||
const [autoSendText] = useRecoilState<boolean>(store.autoSendText);
|
const [autoSendText] = useRecoilState<boolean>(store.autoSendText);
|
||||||
const [text, setText] = useState<string>('');
|
const [text, setText] = useState<string>('');
|
||||||
|
|
@ -196,7 +197,7 @@ const useSpeechToTextExternal = (onTranscriptionComplete: (text: string) => void
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleKeyDown = async (e: KeyboardEvent) => {
|
const handleKeyDown = async (e: KeyboardEvent) => {
|
||||||
if (e.shiftKey && e.altKey && e.code === 'KeyL' && endpointSTT !== 'browser') {
|
if (e.shiftKey && e.altKey && e.code === 'KeyL' && !externalSpeechToText) {
|
||||||
if (!window.MediaRecorder) {
|
if (!window.MediaRecorder) {
|
||||||
showToast({ message: 'MediaRecorder is not supported in this browser', status: 'error' });
|
showToast({ message: 'MediaRecorder is not supported in this browser', status: 'error' });
|
||||||
return;
|
return;
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,10 @@ import type { TMessage } from 'librechat-data-provider';
|
||||||
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
import useTextToSpeechExternal from './useTextToSpeechExternal';
|
||||||
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
import useTextToSpeechBrowser from './useTextToSpeechBrowser';
|
||||||
import { usePauseGlobalAudio } from '../Audio';
|
import { usePauseGlobalAudio } from '../Audio';
|
||||||
import { useRecoilState } from 'recoil';
|
import useGetAudioSettings from './useGetAudioSettings';
|
||||||
import store from '~/store';
|
|
||||||
|
|
||||||
const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
|
const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
|
||||||
const [endpointTTS] = useRecoilState<string>(store.endpointTTS);
|
const { externalTextToSpeech } = useGetAudioSettings();
|
||||||
const useExternalTextToSpeech = endpointTTS === 'external';
|
|
||||||
|
|
||||||
const {
|
const {
|
||||||
generateSpeechLocal: generateSpeechLocal,
|
generateSpeechLocal: generateSpeechLocal,
|
||||||
|
|
@ -26,9 +24,9 @@ const useTextToSpeech = (message: TMessage, isLast: boolean, index = 0) => {
|
||||||
} = useTextToSpeechExternal(message.messageId, isLast, index);
|
} = useTextToSpeechExternal(message.messageId, isLast, index);
|
||||||
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
const { pauseGlobalAudio } = usePauseGlobalAudio(index);
|
||||||
|
|
||||||
const generateSpeech = useExternalTextToSpeech ? generateSpeechExternal : generateSpeechLocal;
|
const generateSpeech = externalTextToSpeech ? generateSpeechExternal : generateSpeechLocal;
|
||||||
const cancelSpeech = useExternalTextToSpeech ? cancelSpeechExternal : cancelSpeechLocal;
|
const cancelSpeech = externalTextToSpeech ? cancelSpeechExternal : cancelSpeechLocal;
|
||||||
const isSpeaking = useExternalTextToSpeech ? isSpeakingExternal : isSpeakingLocal;
|
const isSpeaking = externalTextToSpeech ? isSpeakingExternal : isSpeakingLocal;
|
||||||
|
|
||||||
const isMouseDownRef = useRef(false);
|
const isMouseDownRef = useRef(false);
|
||||||
const timerRef = useRef<number | undefined>(undefined);
|
const timerRef = useRef<number | undefined>(undefined);
|
||||||
|
|
|
||||||
|
|
@ -539,7 +539,7 @@ export default {
|
||||||
com_nav_modular_chat: 'Enable switching Endpoints mid-conversation',
|
com_nav_modular_chat: 'Enable switching Endpoints mid-conversation',
|
||||||
com_nav_latex_parsing: 'Parsing LaTeX in messages (may affect performance)',
|
com_nav_latex_parsing: 'Parsing LaTeX in messages (may affect performance)',
|
||||||
com_nav_text_to_speech: 'Text to Speech',
|
com_nav_text_to_speech: 'Text to Speech',
|
||||||
com_nav_automatic_playback: 'Autoplay Latest Message (external only)',
|
com_nav_automatic_playback: 'Autoplay Latest Message',
|
||||||
com_nav_speech_to_text: 'Speech to Text',
|
com_nav_speech_to_text: 'Speech to Text',
|
||||||
com_nav_profile_picture: 'Profile Picture',
|
com_nav_profile_picture: 'Profile Picture',
|
||||||
com_nav_change_picture: 'Change picture',
|
com_nav_change_picture: 'Change picture',
|
||||||
|
|
|
||||||
|
|
@ -18,32 +18,45 @@ const staticAtoms = {
|
||||||
showPopover: atom<boolean>({ key: 'showPopover', default: false }),
|
showPopover: atom<boolean>({ key: 'showPopover', default: false }),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Atoms with localStorage
|
|
||||||
const localStorageAtoms = {
|
const localStorageAtoms = {
|
||||||
|
// General settings
|
||||||
autoScroll: atomWithLocalStorage('autoScroll', false),
|
autoScroll: atomWithLocalStorage('autoScroll', false),
|
||||||
showCode: atomWithLocalStorage('showCode', false),
|
|
||||||
hideSidePanel: atomWithLocalStorage('hideSidePanel', false),
|
hideSidePanel: atomWithLocalStorage('hideSidePanel', false),
|
||||||
modularChat: atomWithLocalStorage('modularChat', true),
|
|
||||||
LaTeXParsing: atomWithLocalStorage('LaTeXParsing', true),
|
// Messages settings
|
||||||
UsernameDisplay: atomWithLocalStorage('UsernameDisplay', true),
|
|
||||||
TextToSpeech: atomWithLocalStorage('textToSpeech', true),
|
|
||||||
automaticPlayback: atomWithLocalStorage('automaticPlayback', false),
|
|
||||||
enterToSend: atomWithLocalStorage('enterToSend', true),
|
enterToSend: atomWithLocalStorage('enterToSend', true),
|
||||||
SpeechToText: atomWithLocalStorage('speechToText', true),
|
showCode: atomWithLocalStorage('showCode', false),
|
||||||
conversationMode: atomWithLocalStorage('conversationMode', false),
|
saveDrafts: atomWithLocalStorage('saveDrafts', false),
|
||||||
advancedMode: atomWithLocalStorage('advancedMode', false),
|
|
||||||
autoSendText: atomWithLocalStorage('autoSendText', false),
|
|
||||||
autoTranscribeAudio: atomWithLocalStorage('autoTranscribeAudio', false),
|
|
||||||
decibelValue: atomWithLocalStorage('decibelValue', -45),
|
|
||||||
endpointSTT: atomWithLocalStorage('endpointSTT', 'browser'),
|
|
||||||
endpointTTS: atomWithLocalStorage('endpointTTS', 'browser'),
|
|
||||||
cacheTTS: atomWithLocalStorage('cacheTTS', true),
|
|
||||||
voice: atomWithLocalStorage('voice', ''),
|
|
||||||
forkSetting: atomWithLocalStorage('forkSetting', ''),
|
forkSetting: atomWithLocalStorage('forkSetting', ''),
|
||||||
splitAtTarget: atomWithLocalStorage('splitAtTarget', false),
|
splitAtTarget: atomWithLocalStorage('splitAtTarget', false),
|
||||||
|
|
||||||
rememberForkOption: atomWithLocalStorage('rememberForkOption', true),
|
rememberForkOption: atomWithLocalStorage('rememberForkOption', true),
|
||||||
|
|
||||||
|
// Beta features settings
|
||||||
|
modularChat: atomWithLocalStorage('modularChat', true),
|
||||||
|
LaTeXParsing: atomWithLocalStorage('LaTeXParsing', true),
|
||||||
|
|
||||||
|
// Speech settings
|
||||||
|
conversationMode: atomWithLocalStorage('conversationMode', false),
|
||||||
|
advancedMode: atomWithLocalStorage('advancedMode', false),
|
||||||
|
|
||||||
|
speechToText: atomWithLocalStorage('speechToText', true),
|
||||||
|
engineSTT: atomWithLocalStorage('engineSTT', 'browser'),
|
||||||
|
languageSTT: atomWithLocalStorage('languageSTT', ''),
|
||||||
|
autoTranscribeAudio: atomWithLocalStorage('autoTranscribeAudio', false),
|
||||||
|
decibelValue: atomWithLocalStorage('decibelValue', -45),
|
||||||
|
autoSendText: atomWithLocalStorage('autoSendText', false),
|
||||||
|
|
||||||
|
textToSpeech: atomWithLocalStorage('textToSpeech', true),
|
||||||
|
engineTTS: atomWithLocalStorage('engineTTS', 'browser'),
|
||||||
|
voice: atomWithLocalStorage('voice', ''),
|
||||||
|
languageTTS: atomWithLocalStorage('languageTTS', ''),
|
||||||
|
automaticPlayback: atomWithLocalStorage('automaticPlayback', false),
|
||||||
playbackRate: atomWithLocalStorage<number | null>('playbackRate', null),
|
playbackRate: atomWithLocalStorage<number | null>('playbackRate', null),
|
||||||
saveDrafts: atomWithLocalStorage('saveDrafts', false),
|
cacheTTS: atomWithLocalStorage('cacheTTS', true),
|
||||||
|
|
||||||
|
// Account settings
|
||||||
|
UsernameDisplay: atomWithLocalStorage('UsernameDisplay', true),
|
||||||
};
|
};
|
||||||
|
|
||||||
export default { ...staticAtoms, ...localStorageAtoms };
|
export default { ...staticAtoms, ...localStorageAtoms };
|
||||||
|
|
|
||||||
|
|
@ -128,14 +128,18 @@ export const images = () => `${files()}/images`;
|
||||||
|
|
||||||
export const avatar = () => `${images()}/avatar`;
|
export const avatar = () => `${images()}/avatar`;
|
||||||
|
|
||||||
export const speechToText = () => `${files()}/stt`;
|
export const speech = () => `${files()}/speech`;
|
||||||
|
|
||||||
export const textToSpeech = () => `${files()}/tts`;
|
export const speechToText = () => `${speech()}/stt`;
|
||||||
|
|
||||||
|
export const textToSpeech = () => `${speech()}/tts`;
|
||||||
|
|
||||||
export const textToSpeechManual = () => `${textToSpeech()}/manual`;
|
export const textToSpeechManual = () => `${textToSpeech()}/manual`;
|
||||||
|
|
||||||
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
|
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
|
||||||
|
|
||||||
|
export const getCustomConfigSpeech = () => `${speech()}/config/get`;
|
||||||
|
|
||||||
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;
|
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;
|
||||||
|
|
||||||
export const getPromptGroupsWithFilters = (filter: object) => {
|
export const getPromptGroupsWithFilters = (filter: object) => {
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import { fileConfigSchema } from './file-config';
|
||||||
import { specsConfigSchema } from './models';
|
import { specsConfigSchema } from './models';
|
||||||
import { FileSources } from './types/files';
|
import { FileSources } from './types/files';
|
||||||
import { TModelsConfig } from './types';
|
import { TModelsConfig } from './types';
|
||||||
|
import { speech } from './api-endpoints';
|
||||||
|
|
||||||
export const defaultSocialLogins = ['google', 'facebook', 'openid', 'github', 'discord'];
|
export const defaultSocialLogins = ['google', 'facebook', 'openid', 'github', 'discord'];
|
||||||
|
|
||||||
|
|
@ -273,6 +274,40 @@ const sttSchema = z.object({
|
||||||
.optional(),
|
.optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const speechTab = z
|
||||||
|
.object({
|
||||||
|
conversationMode: z.boolean().optional(),
|
||||||
|
advancedMode: z.boolean().optional(),
|
||||||
|
speechToText: z
|
||||||
|
.boolean()
|
||||||
|
.optional()
|
||||||
|
.or(
|
||||||
|
z.object({
|
||||||
|
engineSTT: z.string().optional(),
|
||||||
|
languageSTT: z.string().optional(),
|
||||||
|
autoTranscribeAudio: z.boolean().optional(),
|
||||||
|
decibelValue: z.number().optional(),
|
||||||
|
autoSendText: z.boolean().optional(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.optional(),
|
||||||
|
textToSpeech: z
|
||||||
|
.boolean()
|
||||||
|
.optional()
|
||||||
|
.or(
|
||||||
|
z.object({
|
||||||
|
engineTTS: z.string().optional(),
|
||||||
|
voice: z.string().optional(),
|
||||||
|
languageTTS: z.string().optional(),
|
||||||
|
automaticPlayback: z.boolean().optional(),
|
||||||
|
playbackRate: z.number().optional(),
|
||||||
|
cacheTTS: z.boolean().optional(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.optional(),
|
||||||
|
})
|
||||||
|
.optional();
|
||||||
|
|
||||||
export enum RateLimitPrefix {
|
export enum RateLimitPrefix {
|
||||||
FILE_UPLOAD = 'FILE_UPLOAD',
|
FILE_UPLOAD = 'FILE_UPLOAD',
|
||||||
IMPORT = 'IMPORT',
|
IMPORT = 'IMPORT',
|
||||||
|
|
@ -362,8 +397,13 @@ export const configSchema = z.object({
|
||||||
allowedDomains: z.array(z.string()).optional(),
|
allowedDomains: z.array(z.string()).optional(),
|
||||||
})
|
})
|
||||||
.default({ socialLogins: defaultSocialLogins }),
|
.default({ socialLogins: defaultSocialLogins }),
|
||||||
tts: ttsSchema.optional(),
|
speech: z
|
||||||
stt: sttSchema.optional(),
|
.object({
|
||||||
|
tts: ttsSchema.optional(),
|
||||||
|
stt: sttSchema.optional(),
|
||||||
|
speechTab: speechTab.optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
rateLimits: rateLimitSchema.optional(),
|
rateLimits: rateLimitSchema.optional(),
|
||||||
fileConfig: fileConfigSchema.optional(),
|
fileConfig: fileConfigSchema.optional(),
|
||||||
modelSpecs: specsConfigSchema.optional(),
|
modelSpecs: specsConfigSchema.optional(),
|
||||||
|
|
|
||||||
|
|
@ -355,6 +355,10 @@ export const getVoices = (): Promise<f.VoiceResponse> => {
|
||||||
return request.get(endpoints.textToSpeechVoices());
|
return request.get(endpoints.textToSpeechVoices());
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getCustomConfigSpeech = (): Promise<f.getCustomConfigSpeechResponse[]> => {
|
||||||
|
return request.get(endpoints.getCustomConfigSpeech());
|
||||||
|
};
|
||||||
|
|
||||||
/* actions */
|
/* actions */
|
||||||
|
|
||||||
export const updateAction = (data: m.UpdateActionVariables): Promise<m.UpdateActionResponse> => {
|
export const updateAction = (data: m.UpdateActionVariables): Promise<m.UpdateActionResponse> => {
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ export enum QueryKeys {
|
||||||
assistantDocs = 'assistantDocs',
|
assistantDocs = 'assistantDocs',
|
||||||
fileDownload = 'fileDownload',
|
fileDownload = 'fileDownload',
|
||||||
voices = 'voices',
|
voices = 'voices',
|
||||||
|
customConfigSpeech = 'customConfigSpeech',
|
||||||
prompts = 'prompts',
|
prompts = 'prompts',
|
||||||
prompt = 'prompt',
|
prompt = 'prompt',
|
||||||
promptGroups = 'promptGroups',
|
promptGroups = 'promptGroups',
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,8 @@ export type SpeechToTextResponse = {
|
||||||
|
|
||||||
export type VoiceResponse = string[];
|
export type VoiceResponse = string[];
|
||||||
|
|
||||||
|
export type getCustomConfigSpeechResponse = { [key: string]: string };
|
||||||
|
|
||||||
export type UploadMutationOptions = {
|
export type UploadMutationOptions = {
|
||||||
onSuccess?: (data: TFileUpload, variables: FormData, context?: unknown) => void;
|
onSuccess?: (data: TFileUpload, variables: FormData, context?: unknown) => void;
|
||||||
onMutate?: (variables: FormData) => void | Promise<unknown>;
|
onMutate?: (variables: FormData) => void | Promise<unknown>;
|
||||||
|
|
@ -113,6 +115,12 @@ export type VoiceOptions = {
|
||||||
onError?: (error: unknown, variables: unknown, context?: unknown) => void;
|
onError?: (error: unknown, variables: unknown, context?: unknown) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type getCustomConfigSpeechOptions = {
|
||||||
|
onSuccess?: (data: getCustomConfigSpeechResponse, variables: unknown, context?: unknown) => void;
|
||||||
|
onMutate?: () => void | Promise<unknown>;
|
||||||
|
onError?: (error: unknown, variables: unknown, context?: unknown) => void;
|
||||||
|
};
|
||||||
|
|
||||||
export type DeleteFilesResponse = {
|
export type DeleteFilesResponse = {
|
||||||
message: string;
|
message: string;
|
||||||
result: Record<string, unknown>;
|
result: Record<string, unknown>;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue