From 5eabd2493c8cd16cc368be569d5781e3234c0069 Mon Sep 17 00:00:00 2001 From: Marco Beretta <81851188+berry-13@users.noreply.github.com> Date: Sat, 23 Nov 2024 12:20:17 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=8A=20feat:=20update=20Deepgram=20SDK?= =?UTF-8?q?=20integration=20for=20STT=20and=20remove=20unused=20TTS=20prov?= =?UTF-8?q?ider?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/server/services/Files/Audio/STTService.js | 12 ++++- api/server/services/Files/Audio/TTSService.js | 46 ------------------- 2 files changed, 10 insertions(+), 48 deletions(-) diff --git a/api/server/services/Files/Audio/STTService.js b/api/server/services/Files/Audio/STTService.js index bb32ef1b93..23c53a14b6 100644 --- a/api/server/services/Files/Audio/STTService.js +++ b/api/server/services/Files/Audio/STTService.js @@ -158,7 +158,15 @@ class STTService { return [url, formData, { ...headers, ...formData.getHeaders() }]; } - async deepgramSDKProvider(sttSchema, audioReadStream, audioFile) { + /** + * Transcribes audio using the Deepgram SDK. + * @async + * @param {Object} sttSchema - The STT schema for Deepgram. + * @param {Stream} audioReadStream - The audio data to be transcribed. + * @returns {Promise} A promise that resolves to the transcribed text. + * @throws {Error} If the transcription fails. + */ + async deepgramSDKProvider(sttSchema, audioReadStream) { const apiKey = extractEnvVariable(sttSchema.apiKey) || ''; const deepgram = createClient(apiKey); @@ -194,7 +202,7 @@ class STTService { [configOptions].forEach(this.removeUndefined); const { result, error } = await deepgram.listen.prerecorded.transcribeFile( - Buffer.isBuffer(audioFile) ? audioFile : audioReadStream, + audioReadStream, configOptions, ); diff --git a/api/server/services/Files/Audio/TTSService.js b/api/server/services/Files/Audio/TTSService.js index 8558a7eb9c..5590c8ac7d 100644 --- a/api/server/services/Files/Audio/TTSService.js +++ b/api/server/services/Files/Audio/TTSService.js @@ -248,52 +248,6 @@ class TTSService { return [url, data, headers]; } - deepgramProvider(ttsSchema, input, voice) { - const baseUrl = ttsSchema?.url || 'https://api.deepgram.com/v1/speak'; - const params = { - model: ttsSchema.model, - voice: voice, - language: ttsSchema.language, - }; - - const queryParams = Object.entries(params) - .filter(([, value]) => value) - .map(([key, value]) => `${key}=${value}`) - .join('&'); - - const url = queryParams ? `${baseUrl}?${queryParams}` : baseUrl; - - if ( - ttsSchema?.voices && - ttsSchema.voices.length > 0 && - !ttsSchema.voices.includes(voice) && - !ttsSchema.voices.includes('ALL') - ) { - throw new Error(`Voice ${voice} is not available.`); - } - - const data = { - input, - model: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined, - language: ttsSchema?.language, - media_settings: { - bit_rate: ttsSchema?.media_settings?.bit_rate, - sample_rate: ttsSchema?.media_settings?.sample_rate, - }, - }; - - const headers = { - 'Content-Type': 'application/json', - Authorization: `Bearer ${extractEnvVariable(ttsSchema?.apiKey)}`, - }; - - if (extractEnvVariable(ttsSchema.apiKey) === '') { - delete headers.Authorization; - } - - return [url, data, headers]; - } - /** * Sends a TTS request to the specified provider. * @async