diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index ced2387bd5..7520cbb897 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -27,7 +27,6 @@ const { createContextHandlers, } = require('./prompts'); const { encodeAndFormat } = require('~/server/services/Files/images/encode'); -const { updateTokenWebsocket } = require('~/server/services/Files/Audio'); const { isEnabled, sleep } = require('~/server/utils'); const { handleOpenAIErrors } = require('./tools/util'); const spendTokens = require('~/models/spendTokens'); @@ -595,7 +594,6 @@ class OpenAIClient extends BaseClient { payload, (progressMessage) => { if (progressMessage === '[DONE]') { - updateTokenWebsocket('[DONE]'); return; } diff --git a/api/server/routes/files/index.js b/api/server/routes/files/index.js index 2911ecb0b3..6317f4495f 100644 --- a/api/server/routes/files/index.js +++ b/api/server/routes/files/index.js @@ -1,19 +1,11 @@ const express = require('express'); -const { - uaParser, - checkBan, - requireJwtAuth, - createFileLimiters, - createTTSLimiters, - createSTTLimiters, -} = require('~/server/middleware'); +const { uaParser, checkBan, requireJwtAuth, createFileLimiters } = require('~/server/middleware'); const { createMulterInstance } = require('./multer'); const files = require('./files'); const images = require('./images'); const avatar = require('./avatar'); -const stt = require('./stt'); -const tts = require('./tts'); +const speech = require('./speech'); const initialize = async () => { const router = express.Router(); @@ -21,11 +13,8 @@ const initialize = async () => { router.use(checkBan); router.use(uaParser); - /* Important: stt/tts routes must be added before the upload limiters */ - const { sttIpLimiter, sttUserLimiter } = createSTTLimiters(); - const { ttsIpLimiter, ttsUserLimiter } = createTTSLimiters(); - router.use('/stt', sttIpLimiter, sttUserLimiter, stt); - router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts); + /* Important: speech route must be added before the upload limiters */ + router.use('/speech', speech); const upload = await createMulterInstance(); const { fileUploadIpLimiter, fileUploadUserLimiter } = createFileLimiters(); diff --git a/api/server/routes/files/speech/customConfigSpeech.js b/api/server/routes/files/speech/customConfigSpeech.js new file mode 100644 index 0000000000..c3b1e2eb47 --- /dev/null +++ b/api/server/routes/files/speech/customConfigSpeech.js @@ -0,0 +1,10 @@ +const express = require('express'); +const router = express.Router(); + +const { getCustomConfigSpeech } = require('~/server/services/Files/Audio'); + +router.get('/get', async (req, res) => { + await getCustomConfigSpeech(req, res); +}); + +module.exports = router; diff --git a/api/server/routes/files/speech/index.js b/api/server/routes/files/speech/index.js new file mode 100644 index 0000000000..074ed553c9 --- /dev/null +++ b/api/server/routes/files/speech/index.js @@ -0,0 +1,17 @@ +const express = require('express'); +const { createTTSLimiters, createSTTLimiters } = require('~/server/middleware'); + +const stt = require('./stt'); +const tts = require('./tts'); +const customConfigSpeech = require('./customConfigSpeech'); + +const router = express.Router(); + +const { sttIpLimiter, sttUserLimiter } = createSTTLimiters(); +const { ttsIpLimiter, ttsUserLimiter } = createTTSLimiters(); +router.use('/stt', sttIpLimiter, sttUserLimiter, stt); +router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts); + +router.use('/config', customConfigSpeech); + +module.exports = router; diff --git a/api/server/routes/files/stt.js b/api/server/routes/files/speech/stt.js similarity index 100% rename from api/server/routes/files/stt.js rename to api/server/routes/files/speech/stt.js diff --git a/api/server/routes/files/tts.js b/api/server/routes/files/speech/tts.js similarity index 100% rename from api/server/routes/files/tts.js rename to api/server/routes/files/speech/tts.js diff --git a/api/server/services/Config/loadCustomConfig.js b/api/server/services/Config/loadCustomConfig.js index 1b5b287066..2127ec239e 100644 --- a/api/server/services/Config/loadCustomConfig.js +++ b/api/server/services/Config/loadCustomConfig.js @@ -76,8 +76,28 @@ Please specify a correct \`imageOutputType\` value (case-sensitive). ); } if (!result.success) { - i === 0 && logger.error(`Invalid custom config file at ${configPath}`, result.error); - i === 0 && i++; + let errorMessage = `Invalid custom config file at ${configPath}: +${JSON.stringify(result.error, null, 2)}`; + + if (i === 0) { + logger.error(errorMessage); + const speechError = result.error.errors.find( + (err) => + err.code === 'unrecognized_keys' && + (err.message?.includes('stt') || err.message?.includes('tts')), + ); + + if (speechError) { + logger.warn(` +The Speech-to-text and Text-to-speech configuration format has recently changed. +If you're getting this error, please refer to the latest documentation: + +https://www.librechat.ai/docs/configuration/stt_tts`); + } + + i++; + } + return null; } else { logger.info('Custom config file loaded:'); diff --git a/api/server/services/Files/Audio/getCustomConfigSpeech.js b/api/server/services/Files/Audio/getCustomConfigSpeech.js new file mode 100644 index 0000000000..e9d185af2e --- /dev/null +++ b/api/server/services/Files/Audio/getCustomConfigSpeech.js @@ -0,0 +1,50 @@ +const getCustomConfig = require('~/server/services/Config/getCustomConfig'); + +/** + * This function retrieves the speechTab settings from the custom configuration + * It first fetches the custom configuration + * Then, it checks if the custom configuration and the speechTab schema exist + * If they do, it sends the speechTab settings as a JSON response + * If they don't, it throws an error + * + * @param {Object} req - The request object + * @param {Object} res - The response object + * @returns {Promise} + * @throws {Error} - If the custom configuration or the speechTab schema is missing, an error is thrown + */ +async function getCustomConfigSpeech(req, res) { + try { + const customConfig = await getCustomConfig(); + + if (!customConfig || !customConfig.speech?.speechTab) { + throw new Error('Configuration or speechTab schema is missing'); + } + + const ttsSchema = customConfig.speech?.speechTab; + let settings = {}; + + if (ttsSchema.advancedMode !== undefined) { + settings.advancedMode = ttsSchema.advancedMode; + } + if (ttsSchema.speechToText) { + for (const key in ttsSchema.speechToText) { + if (ttsSchema.speechToText[key] !== undefined) { + settings[key] = ttsSchema.speechToText[key]; + } + } + } + if (ttsSchema.textToSpeech) { + for (const key in ttsSchema.textToSpeech) { + if (ttsSchema.textToSpeech[key] !== undefined) { + settings[key] = ttsSchema.textToSpeech[key]; + } + } + } + + res.json(settings); + } catch (error) { + res.status(200).send(); + } +} + +module.exports = getCustomConfigSpeech; diff --git a/api/server/services/Files/Audio/getVoices.js b/api/server/services/Files/Audio/getVoices.js index b87cd363b2..56341cd2b0 100644 --- a/api/server/services/Files/Audio/getVoices.js +++ b/api/server/services/Files/Audio/getVoices.js @@ -1,4 +1,3 @@ -const { logger } = require('~/config'); const getCustomConfig = require('~/server/services/Config/getCustomConfig'); const { getProvider } = require('./textToSpeech'); @@ -16,11 +15,11 @@ async function getVoices(req, res) { try { const customConfig = await getCustomConfig(); - if (!customConfig || !customConfig?.tts) { + if (!customConfig || !customConfig?.speech?.tts) { throw new Error('Configuration or TTS schema is missing'); } - const ttsSchema = customConfig?.tts; + const ttsSchema = customConfig?.speech?.tts; const provider = getProvider(ttsSchema); let voices; @@ -40,8 +39,7 @@ async function getVoices(req, res) { res.json(voices); } catch (error) { - logger.error(`Failed to get voices: ${error.message}`); - res.status(500).json({ error: 'Failed to get voices' }); + res.status(500).json({ error: `Failed to get voices: ${error.message}` }); } } diff --git a/api/server/services/Files/Audio/index.js b/api/server/services/Files/Audio/index.js index a201ea556c..75882f2397 100644 --- a/api/server/services/Files/Audio/index.js +++ b/api/server/services/Files/Audio/index.js @@ -1,11 +1,11 @@ const getVoices = require('./getVoices'); +const getCustomConfigSpeech = require('./getCustomConfigSpeech'); const textToSpeech = require('./textToSpeech'); const speechToText = require('./speechToText'); -const { updateTokenWebsocket } = require('./webSocket'); module.exports = { getVoices, + getCustomConfigSpeech, speechToText, ...textToSpeech, - updateTokenWebsocket, }; diff --git a/api/server/services/Files/Audio/speechToText.js b/api/server/services/Files/Audio/speechToText.js index 96e70b76fe..7e0d2a2145 100644 --- a/api/server/services/Files/Audio/speechToText.js +++ b/api/server/services/Files/Audio/speechToText.js @@ -25,7 +25,7 @@ async function handleResponse(response) { } function getProvider(sttSchema) { - if (sttSchema.openai) { + if (sttSchema?.openai) { return 'openai'; } @@ -176,7 +176,7 @@ async function speechToText(req, res) { const audioReadStream = Readable.from(audioBuffer); audioReadStream.path = 'audio.wav'; - const provider = getProvider(customConfig.stt); + const provider = getProvider(customConfig.speech.stt); let [url, data, headers] = []; diff --git a/api/server/services/Files/Audio/textToSpeech.js b/api/server/services/Files/Audio/textToSpeech.js index 7778faabeb..49a0d4e2e7 100644 --- a/api/server/services/Files/Audio/textToSpeech.js +++ b/api/server/services/Files/Audio/textToSpeech.js @@ -191,8 +191,8 @@ function localAIProvider(ttsSchema, input, voice) { * @returns {Promise<[string, TProviderSchema]>} */ async function getProviderSchema(customConfig) { - const provider = getProvider(customConfig.tts); - return [provider, customConfig.tts[provider]]; + const provider = getProvider(customConfig.speech.tts); + return [provider, customConfig.speech.tts[provider]]; } /** diff --git a/api/server/services/Files/Audio/webSocket.js b/api/server/services/Files/Audio/webSocket.js deleted file mode 100644 index f2d96c7941..0000000000 --- a/api/server/services/Files/Audio/webSocket.js +++ /dev/null @@ -1,31 +0,0 @@ -let token = ''; - -function updateTokenWebsocket(newToken) { - console.log('Token:', newToken); - token = newToken; -} - -function sendTextToWebsocket(ws, onDataReceived) { - if (token === '[DONE]') { - ws.send(' '); - return; - } - - if (ws.readyState === WebSocket.OPEN) { - ws.send(token); - - ws.onmessage = function (event) { - console.log('Received:', event.data); - if (onDataReceived) { - onDataReceived(event.data); // Pass the received data to the callback function - } - }; - } else { - console.error('WebSocket is not open. Ready state is: ' + ws.readyState); - } -} - -module.exports = { - updateTokenWebsocket, - sendTextToWebsocket, -}; diff --git a/client/src/components/Chat/Input/AudioRecorder.tsx b/client/src/components/Chat/Input/AudioRecorder.tsx index 48d89c2c3f..dd088ea3c8 100644 --- a/client/src/components/Chat/Input/AudioRecorder.tsx +++ b/client/src/components/Chat/Input/AudioRecorder.tsx @@ -31,15 +31,26 @@ export default function AudioRecorder({ } }; - const { isListening, isLoading, startRecording, stopRecording, speechText, clearText } = - useSpeechToText(handleTranscriptionComplete); + const { + isListening, + isLoading, + startRecording, + stopRecording, + interimTranscript, + speechText, + clearText, + } = useSpeechToText(handleTranscriptionComplete); useEffect(() => { - if (textAreaRef.current) { + if (isListening && textAreaRef.current) { + methods.setValue('text', interimTranscript, { + shouldValidate: true, + }); + } else if (textAreaRef.current) { textAreaRef.current.value = speechText; methods.setValue('text', speechText, { shouldValidate: true }); } - }, [speechText, methods, textAreaRef]); + }, [interimTranscript, speechText, methods, textAreaRef]); const handleStartRecording = async () => { await startRecording(); diff --git a/client/src/components/Chat/Input/ChatForm.tsx b/client/src/components/Chat/Input/ChatForm.tsx index 63ac15b01b..2ad4580eea 100644 --- a/client/src/components/Chat/Input/ChatForm.tsx +++ b/client/src/components/Chat/Input/ChatForm.tsx @@ -38,8 +38,8 @@ const ChatForm = ({ index = 0 }) => { const submitButtonRef = useRef(null); const textAreaRef = useRef(null); - const SpeechToText = useRecoilValue(store.SpeechToText); - const TextToSpeech = useRecoilValue(store.TextToSpeech); + const SpeechToText = useRecoilValue(store.speechToText); + const TextToSpeech = useRecoilValue(store.textToSpeech); const automaticPlayback = useRecoilValue(store.automaticPlayback); const [showStopButton, setShowStopButton] = useRecoilState(store.showStopButtonByIndex(index)); diff --git a/client/src/components/Chat/Input/Files/Table/DataTable.tsx b/client/src/components/Chat/Input/Files/Table/DataTable.tsx index a61a41ed0e..c80cb33191 100644 --- a/client/src/components/Chat/Input/Files/Table/DataTable.tsx +++ b/client/src/components/Chat/Input/Files/Table/DataTable.tsx @@ -96,7 +96,7 @@ export default function DataTable({ columns, data }: DataTablePro deleteFiles({ files: filesToDelete as TFile[] }); setRowSelection({}); }} - className="dark:hover:bg-gray-850/25 ml-1 gap-2 sm:ml-0" + className="ml-1 gap-2 dark:hover:bg-gray-850/25 sm:ml-0" disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting} > {isDeleting ? ( diff --git a/client/src/components/Chat/Messages/HoverButtons.tsx b/client/src/components/Chat/Messages/HoverButtons.tsx index 3c3ad97890..163d5e9765 100644 --- a/client/src/components/Chat/Messages/HoverButtons.tsx +++ b/client/src/components/Chat/Messages/HoverButtons.tsx @@ -39,7 +39,7 @@ export default function HoverButtons({ const { endpoint: _endpoint, endpointType } = conversation ?? {}; const endpoint = endpointType ?? _endpoint; const [isCopied, setIsCopied] = useState(false); - const [TextToSpeech] = useRecoilState(store.TextToSpeech); + const [TextToSpeech] = useRecoilState(store.textToSpeech); const { hideEditButton, diff --git a/client/src/components/Files/FileList/DataTableFile.tsx b/client/src/components/Files/FileList/DataTableFile.tsx index 50cb855ae9..92e454016d 100644 --- a/client/src/components/Files/FileList/DataTableFile.tsx +++ b/client/src/components/Files/FileList/DataTableFile.tsx @@ -106,7 +106,7 @@ export default function DataTableFile({ deleteFiles({ files: filesToDelete as TFile[] }); setRowSelection({}); }} - className="dark:hover:bg-gray-850/25 ml-1 gap-2 sm:ml-0" + className="ml-1 gap-2 dark:hover:bg-gray-850/25 sm:ml-0" disabled={!table.getFilteredSelectedRowModel().rows.length || isDeleting} > {isDeleting ? ( diff --git a/client/src/components/Files/FileList/FileTableColumns.tsx b/client/src/components/Files/FileList/FileTableColumns.tsx index 6421cd8ec6..8e670aa805 100644 --- a/client/src/components/Files/FileList/FileTableColumns.tsx +++ b/client/src/components/Files/FileList/FileTableColumns.tsx @@ -75,18 +75,21 @@ export const fileTableColumns: ColumnDef[] = [ return ( <> {attachedVectorStores.map((vectorStore, index) => { - if (index === 4) - {return ( - - + if (index === 4) { + return ( + +   - {attachedVectorStores.length - index} more - - );} - if (index > 4) {return null;} + {attachedVectorStores.length - index} more + + ); + } + if (index > 4) { + return null; + } return ( {vectorStore.name} diff --git a/client/src/components/Messages/ScrollToBottom.tsx b/client/src/components/Messages/ScrollToBottom.tsx index 01434f5ecb..b5eef83735 100644 --- a/client/src/components/Messages/ScrollToBottom.tsx +++ b/client/src/components/Messages/ScrollToBottom.tsx @@ -8,7 +8,7 @@ export default function ScrollToBottom({ scrollHandler }: Props) { return (