diff --git a/api/server/services/AppService.js b/api/server/services/AppService.js index 2c67f7de4d..0390faf515 100644 --- a/api/server/services/AppService.js +++ b/api/server/services/AppService.js @@ -87,12 +87,14 @@ const AppService = async () => { const registration = config.registration ?? configDefaults.registration; const interfaceConfig = await loadDefaultInterface(config, configDefaults); const turnstileConfig = loadTurnstileConfig(config, configDefaults); + const speech = config.speech; const defaultConfig = { ocr, paths, config, memory, + speech, balance, mcpConfig, webSearch, diff --git a/api/server/services/Files/Audio/STTService.js b/api/server/services/Files/Audio/STTService.js index 49a800336b..2b6fa1a390 100644 --- a/api/server/services/Files/Audio/STTService.js +++ b/api/server/services/Files/Audio/STTService.js @@ -2,10 +2,10 @@ const axios = require('axios'); const fs = require('fs').promises; const FormData = require('form-data'); const { Readable } = require('stream'); +const { logger } = require('@librechat/data-schemas'); const { genAzureEndpoint } = require('@librechat/api'); const { extractEnvVariable, STTProviders } = require('librechat-data-provider'); -const { getCustomConfig } = require('~/server/services/Config'); -const { logger } = require('~/config'); +const { getAppConfig } = require('~/server/services/Config'); /** * Maps MIME types to their corresponding file extensions for audio files. @@ -84,12 +84,7 @@ function getFileExtensionFromMime(mimeType) { * @class */ class STTService { - /** - * Creates an instance of STTService. - * @param {Object} customConfig - The custom configuration object. - */ - constructor(customConfig) { - this.customConfig = customConfig; + constructor() { this.providerStrategies = { [STTProviders.OPENAI]: this.openAIProvider, [STTProviders.AZURE_OPENAI]: this.azureOpenAIProvider, @@ -104,21 +99,20 @@ class STTService { * @throws {Error} If the custom config is not found. */ static async getInstance() { - const customConfig = await getCustomConfig(); - if (!customConfig) { - throw new Error('Custom config not found'); - } - return new STTService(customConfig); + return new STTService(); } /** * Retrieves the configured STT provider and its schema. + * @param {ServerRequest} req - The request object. * @returns {Promise<[string, Object]>} A promise that resolves to an array containing the provider name and its schema. * @throws {Error} If no STT schema is set, multiple providers are set, or no provider is set. */ - async getProviderSchema() { - const sttSchema = this.customConfig.speech.stt; - + async getProviderSchema(req) { + const appConfig = await getAppConfig({ + role: req?.user?.role, + }); + const sttSchema = appConfig?.speech?.stt; if (!sttSchema) { throw new Error( 'No STT schema is set. Did you configure STT in the custom config (librechat.yaml)?', @@ -274,7 +268,7 @@ class STTService { * @param {Object} res - The response object. * @returns {Promise} */ - async processTextToSpeech(req, res) { + async processSpeechToText(req, res) { if (!req.file) { return res.status(400).json({ message: 'No audio file provided in the FormData' }); } @@ -287,7 +281,7 @@ class STTService { }; try { - const [provider, sttSchema] = await this.getProviderSchema(); + const [provider, sttSchema] = await this.getProviderSchema(req); const text = await this.sttRequest(provider, sttSchema, { audioBuffer, audioFile }); res.json({ text }); } catch (error) { @@ -297,7 +291,7 @@ class STTService { try { await fs.unlink(req.file.path); logger.debug('[/speech/stt] Temp. audio upload file deleted'); - } catch (error) { + } catch { logger.debug('[/speech/stt] Temp. audio upload file already deleted'); } } @@ -322,7 +316,7 @@ async function createSTTService() { */ async function speechToText(req, res) { const sttService = await createSTTService(); - await sttService.processTextToSpeech(req, res); + await sttService.processSpeechToText(req, res); } module.exports = { speechToText }; diff --git a/api/server/services/Files/Audio/TTSService.js b/api/server/services/Files/Audio/TTSService.js index 34d8202156..83916141f5 100644 --- a/api/server/services/Files/Audio/TTSService.js +++ b/api/server/services/Files/Audio/TTSService.js @@ -1,9 +1,9 @@ const axios = require('axios'); +const { logger } = require('@librechat/data-schemas'); const { genAzureEndpoint } = require('@librechat/api'); const { extractEnvVariable, TTSProviders } = require('librechat-data-provider'); const { getRandomVoiceId, createChunkProcessor, splitTextIntoChunks } = require('./streamAudio'); -const { getCustomConfig } = require('~/server/services/Config'); -const { logger } = require('~/config'); +const { getAppConfig } = require('~/server/services/Config'); /** * Service class for handling Text-to-Speech (TTS) operations. @@ -32,11 +32,7 @@ class TTSService { * @throws {Error} If the custom config is not found. */ static async getInstance() { - const customConfig = await getCustomConfig(); - if (!customConfig) { - throw new Error('Custom config not found'); - } - return new TTSService(customConfig); + return new TTSService(); } /** @@ -293,10 +289,13 @@ class TTSService { return res.status(400).send('Missing text in request body'); } + const appConfig = await getAppConfig({ + role: req.user?.role, + }); try { res.setHeader('Content-Type', 'audio/mpeg'); const provider = this.getProvider(); - const ttsSchema = this.customConfig.speech.tts[provider]; + const ttsSchema = appConfig?.speech?.tts?.[provider]; const voice = await this.getVoice(ttsSchema, requestVoice); if (input.length < 4096) { diff --git a/api/server/services/Files/Audio/getCustomConfigSpeech.js b/api/server/services/Files/Audio/getCustomConfigSpeech.js index 36f97bc491..cdf44b810e 100644 --- a/api/server/services/Files/Audio/getCustomConfigSpeech.js +++ b/api/server/services/Files/Audio/getCustomConfigSpeech.js @@ -1,5 +1,5 @@ -const { getCustomConfig } = require('~/server/services/Config'); -const { logger } = require('~/config'); +const { logger } = require('@librechat/data-schemas'); +const { getAppConfig } = require('~/server/services/Config'); /** * This function retrieves the speechTab settings from the custom configuration @@ -15,26 +15,26 @@ const { logger } = require('~/config'); */ async function getCustomConfigSpeech(req, res) { try { - const customConfig = await getCustomConfig(); + const appConfig = await getAppConfig(); - if (!customConfig) { + if (!appConfig) { return res.status(200).send({ message: 'not_found', }); } - const sttExternal = !!customConfig.speech?.stt; - const ttsExternal = !!customConfig.speech?.tts; + const sttExternal = !!appConfig.speech?.stt; + const ttsExternal = !!appConfig.speech?.tts; let settings = { sttExternal, ttsExternal, }; - if (!customConfig.speech?.speechTab) { + if (!appConfig.speech?.speechTab) { return res.status(200).send(settings); } - const speechTab = customConfig.speech.speechTab; + const speechTab = appConfig.speech.speechTab; if (speechTab.advancedMode !== undefined) { settings.advancedMode = speechTab.advancedMode; diff --git a/api/server/services/Files/Audio/getVoices.js b/api/server/services/Files/Audio/getVoices.js index 24612d85e2..0725b987d2 100644 --- a/api/server/services/Files/Audio/getVoices.js +++ b/api/server/services/Files/Audio/getVoices.js @@ -1,5 +1,5 @@ const { TTSProviders } = require('librechat-data-provider'); -const { getCustomConfig } = require('~/server/services/Config'); +const { getAppConfig } = require('~/server/services/Config'); const { getProvider } = require('./TTSService'); /** @@ -14,13 +14,13 @@ const { getProvider } = require('./TTSService'); */ async function getVoices(req, res) { try { - const customConfig = await getCustomConfig(); + const appConfig = await getAppConfig(); - if (!customConfig || !customConfig?.speech?.tts) { + if (!appConfig || !appConfig?.speech?.tts) { throw new Error('Configuration or TTS schema is missing'); } - const ttsSchema = customConfig?.speech?.tts; + const ttsSchema = appConfig?.speech?.tts; const provider = await getProvider(ttsSchema); let voices; diff --git a/packages/api/src/types/config.ts b/packages/api/src/types/config.ts index e25caba7b2..6487203b0f 100644 --- a/packages/api/src/types/config.ts +++ b/packages/api/src/types/config.ts @@ -45,6 +45,8 @@ export interface AppConfig { turnstileConfig?: TCustomConfig['turnstile']; /** Balance configuration */ balance?: TCustomConfig['balance']; + /** Speech configuration */ + speech?: TCustomConfig['speech']; /** MCP server configuration */ mcpConfig?: TCustomConfig['mcpServers'] | null; /** File configuration */