diff --git a/api/server/services/Files/Audio/STTService.js b/api/server/services/Files/Audio/STTService.js index 9255ba30c..95eea62ab 100644 --- a/api/server/services/Files/Audio/STTService.js +++ b/api/server/services/Files/Audio/STTService.js @@ -109,9 +109,11 @@ class STTService { * @throws {Error} If no STT schema is set, multiple providers are set, or no provider is set. */ async getProviderSchema(req) { - const appConfig = await getAppConfig({ - role: req?.user?.role, - }); + const appConfig = + req.config ?? + (await getAppConfig({ + role: req?.user?.role, + })); const sttSchema = appConfig?.speech?.stt; if (!sttSchema) { throw new Error( diff --git a/api/server/services/Files/Audio/TTSService.js b/api/server/services/Files/Audio/TTSService.js index 83c91b2f1..2f36c4b9c 100644 --- a/api/server/services/Files/Audio/TTSService.js +++ b/api/server/services/Files/Audio/TTSService.js @@ -35,11 +35,12 @@ class TTSService { /** * Retrieves the configured TTS provider. + * @param {AppConfig | null | undefined} [appConfig] - The app configuration object. * @returns {string} The name of the configured provider. * @throws {Error} If no provider is set or multiple providers are set. */ - getProvider() { - const ttsSchema = this.customConfig.speech.tts; + getProvider(appConfig) { + const ttsSchema = appConfig?.speech?.tts; if (!ttsSchema) { throw new Error( 'No TTS schema is set. Did you configure TTS in the custom config (librechat.yaml)?', @@ -276,8 +277,8 @@ class TTSService { /** * Processes a text-to-speech request. * @async - * @param {Object} req - The request object. - * @param {Object} res - The response object. + * @param {ServerRequest} req - The request object. + * @param {ServerResponse} res - The response object. * @returns {Promise} */ async processTextToSpeech(req, res) { @@ -287,12 +288,14 @@ class TTSService { return res.status(400).send('Missing text in request body'); } - const appConfig = await getAppConfig({ - role: req.user?.role, - }); + const appConfig = + req.config ?? + (await getAppConfig({ + role: req.user?.role, + })); try { res.setHeader('Content-Type', 'audio/mpeg'); - const provider = this.getProvider(); + const provider = this.getProvider(appConfig); const ttsSchema = appConfig?.speech?.tts?.[provider]; const voice = await this.getVoice(ttsSchema, requestVoice); @@ -344,14 +347,19 @@ class TTSService { /** * Streams audio data from the TTS provider. * @async - * @param {Object} req - The request object. - * @param {Object} res - The response object. + * @param {ServerRequest} req - The request object. + * @param {ServerResponse} res - The response object. * @returns {Promise} */ async streamAudio(req, res) { res.setHeader('Content-Type', 'audio/mpeg'); - const provider = this.getProvider(); - const ttsSchema = this.customConfig.speech.tts[provider]; + const appConfig = + req.config ?? + (await getAppConfig({ + role: req.user?.role, + })); + const provider = this.getProvider(appConfig); + const ttsSchema = appConfig?.speech?.tts?.[provider]; const voice = await this.getVoice(ttsSchema, req.body.voice); let shouldContinue = true; @@ -436,8 +444,8 @@ async function createTTSService() { /** * Wrapper function for text-to-speech processing. * @async - * @param {Object} req - The request object. - * @param {Object} res - The response object. + * @param {ServerRequest} req - The request object. + * @param {ServerResponse} res - The response object. * @returns {Promise} */ async function textToSpeech(req, res) { @@ -460,11 +468,12 @@ async function streamAudio(req, res) { /** * Wrapper function to get the configured TTS provider. * @async + * @param {AppConfig | null | undefined} appConfig - The app configuration object. * @returns {Promise} A promise that resolves to the name of the configured provider. */ -async function getProvider() { +async function getProvider(appConfig) { const ttsService = await createTTSService(); - return ttsService.getProvider(); + return ttsService.getProvider(appConfig); } module.exports = { diff --git a/client/src/components/Chat/Input/AudioRecorder.tsx b/client/src/components/Chat/Input/AudioRecorder.tsx index be06e7d09..e4e716d26 100644 --- a/client/src/components/Chat/Input/AudioRecorder.tsx +++ b/client/src/components/Chat/Input/AudioRecorder.tsx @@ -1,10 +1,11 @@ import { useCallback, useRef } from 'react'; import { useToastContext, TooltipAnchor, ListeningIcon, Spinner } from '@librechat/client'; -import { useLocalize, useSpeechToText } from '~/hooks'; +import { useLocalize, useSpeechToText, useGetAudioSettings } from '~/hooks'; import { useChatFormContext } from '~/Providers'; import { globalAudioId } from '~/common'; import { cn } from '~/utils'; +const isExternalSTT = (speechToTextEndpoint: string) => speechToTextEndpoint === 'external'; export default function AudioRecorder({ disabled, ask, @@ -21,6 +22,8 @@ export default function AudioRecorder({ const { setValue, reset, getValues } = methods; const localize = useLocalize(); const { showToast } = useToastContext(); + const { speechToTextEndpoint } = useGetAudioSettings(); + const existingTextRef = useRef(''); const onTranscriptionComplete = useCallback( @@ -38,23 +41,34 @@ export default function AudioRecorder({ console.log('Unmuting global audio'); globalAudio.muted = false; } - ask({ text }); + /** For external STT, append existing text to the transcription */ + const finalText = + isExternalSTT(speechToTextEndpoint) && existingTextRef.current + ? `${existingTextRef.current} ${text}` + : text; + ask({ text: finalText }); reset({ text: '' }); existingTextRef.current = ''; } }, - [ask, reset, showToast, localize, isSubmitting], + [ask, reset, showToast, localize, isSubmitting, speechToTextEndpoint], ); const setText = useCallback( (text: string) => { - /** The transcript is cumulative, so we only need to prepend the existing text once */ - const newText = existingTextRef.current ? `${existingTextRef.current} ${text}` : text; + let newText = text; + if (isExternalSTT(speechToTextEndpoint)) { + /** For external STT, the text comes as a complete transcription, so append to existing */ + newText = existingTextRef.current ? `${existingTextRef.current} ${text}` : text; + } else { + /** For browser STT, the transcript is cumulative, so we only need to prepend the existing text once */ + newText = existingTextRef.current ? `${existingTextRef.current} ${text}` : text; + } setValue('text', newText, { shouldValidate: true, }); }, - [setValue], + [setValue, speechToTextEndpoint], ); const { isListening, isLoading, startRecording, stopRecording } = useSpeechToText( @@ -73,7 +87,10 @@ export default function AudioRecorder({ const handleStopRecording = async () => { stopRecording(); - existingTextRef.current = ''; + /** For browser STT, clear the reference since text was already being updated */ + if (!isExternalSTT(speechToTextEndpoint)) { + existingTextRef.current = ''; + } }; const renderIcon = () => { diff --git a/client/src/components/Prompts/AdminSettings.tsx b/client/src/components/Prompts/AdminSettings.tsx index bccd4d5a2..6f1580800 100644 --- a/client/src/components/Prompts/AdminSettings.tsx +++ b/client/src/components/Prompts/AdminSettings.tsx @@ -153,7 +153,7 @@ const AdminSettings = () => { {localize('com_ui_admin')} - + {`${localize('com_ui_admin_settings')} - ${localize('com_ui_prompts')}`} diff --git a/client/src/components/Sharing/PeoplePickerAdminSettings.tsx b/client/src/components/Sharing/PeoplePickerAdminSettings.tsx index 8f1500877..96979c8f1 100644 --- a/client/src/components/Sharing/PeoplePickerAdminSettings.tsx +++ b/client/src/components/Sharing/PeoplePickerAdminSettings.tsx @@ -163,7 +163,7 @@ const PeoplePickerAdminSettings = () => { {localize('com_ui_admin_settings')} - + {`${localize('com_ui_admin_settings')} - ${localize( 'com_ui_people_picker', )}`} diff --git a/client/src/components/SidePanel/Agents/AdminSettings.tsx b/client/src/components/SidePanel/Agents/AdminSettings.tsx index 71fce3bed..bbb18b37c 100644 --- a/client/src/components/SidePanel/Agents/AdminSettings.tsx +++ b/client/src/components/SidePanel/Agents/AdminSettings.tsx @@ -157,7 +157,7 @@ const AdminSettings = () => { {localize('com_ui_admin_settings')} - + {`${localize('com_ui_admin_settings')} - ${localize( 'com_ui_agents', )}`} diff --git a/client/src/components/SidePanel/Memories/AdminSettings.tsx b/client/src/components/SidePanel/Memories/AdminSettings.tsx index 57e13621c..82b2fa840 100644 --- a/client/src/components/SidePanel/Memories/AdminSettings.tsx +++ b/client/src/components/SidePanel/Memories/AdminSettings.tsx @@ -146,7 +146,7 @@ const AdminSettings = () => { {localize('com_ui_admin_settings')} - + {`${localize('com_ui_admin_settings')} - ${localize( 'com_ui_memories', )}`}