mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 18:30:15 +01:00
⏯️ fix(tts): Resolve Voice Selection and Manual Playback Issues (#2845)
* fix: voice setting for autoplayback TTS * fix(useTextToSpeechExternal): resolve stateful playback issues and consolidate state logic * refactor: initialize tts voice and provider schema once per request * fix(tts): edge case, longer text inputs. TODO: use continuous stream for longer text inputs * fix(tts): pause global audio on conversation change * refactor: keyvMongo ban cache to allow db updates for unbanning, to prevent server restart * chore: eslint fix * refactor: make ban cache exclusively keyvMongo
This commit is contained in:
parent
8e66683577
commit
514a502b9c
10 changed files with 332 additions and 178 deletions
|
|
@ -2,14 +2,12 @@ const Keyv = require('keyv');
|
||||||
const uap = require('ua-parser-js');
|
const uap = require('ua-parser-js');
|
||||||
const { ViolationTypes } = require('librechat-data-provider');
|
const { ViolationTypes } = require('librechat-data-provider');
|
||||||
const { isEnabled, removePorts } = require('../utils');
|
const { isEnabled, removePorts } = require('../utils');
|
||||||
const keyvRedis = require('~/cache/keyvRedis');
|
const keyvMongo = require('~/cache/keyvMongo');
|
||||||
const denyRequest = require('./denyRequest');
|
const denyRequest = require('./denyRequest');
|
||||||
const { getLogStores } = require('~/cache');
|
const { getLogStores } = require('~/cache');
|
||||||
const User = require('~/models/User');
|
const User = require('~/models/User');
|
||||||
|
|
||||||
const banCache = isEnabled(process.env.USE_REDIS)
|
const banCache = new Keyv({ store: keyvMongo, namespace: ViolationTypes.BAN, ttl: 0 });
|
||||||
? new Keyv({ store: keyvRedis })
|
|
||||||
: new Keyv({ namespace: ViolationTypes.BAN, ttl: 0 });
|
|
||||||
const message = 'Your account has been temporarily banned due to violations of our service.';
|
const message = 'Your account has been temporarily banned due to violations of our service.';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ function findLastSeparatorIndex(text, separators = SEPARATORS) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const MAX_NOT_FOUND_COUNT = 6;
|
const MAX_NOT_FOUND_COUNT = 6;
|
||||||
const MAX_NO_CHANGE_COUNT = 12;
|
const MAX_NO_CHANGE_COUNT = 10;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {string} messageId
|
* @param {string} messageId
|
||||||
|
|
@ -152,6 +152,64 @@ function createChunkProcessor(messageId) {
|
||||||
return processChunks;
|
return processChunks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} text
|
||||||
|
* @param {number} [chunkSize=4000]
|
||||||
|
* @returns {{ text: string, isFinished: boolean }[]}
|
||||||
|
*/
|
||||||
|
function splitTextIntoChunks(text, chunkSize = 4000) {
|
||||||
|
if (!text) {
|
||||||
|
throw new Error('Text is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunks = [];
|
||||||
|
let startIndex = 0;
|
||||||
|
const textLength = text.length;
|
||||||
|
|
||||||
|
while (startIndex < textLength) {
|
||||||
|
let endIndex = Math.min(startIndex + chunkSize, textLength);
|
||||||
|
let chunkText = text.slice(startIndex, endIndex);
|
||||||
|
|
||||||
|
if (endIndex < textLength) {
|
||||||
|
let lastSeparatorIndex = -1;
|
||||||
|
for (const separator of SEPARATORS) {
|
||||||
|
const index = chunkText.lastIndexOf(separator);
|
||||||
|
if (index !== -1) {
|
||||||
|
lastSeparatorIndex = Math.max(lastSeparatorIndex, index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastSeparatorIndex !== -1) {
|
||||||
|
endIndex = startIndex + lastSeparatorIndex + 1;
|
||||||
|
chunkText = text.slice(startIndex, endIndex);
|
||||||
|
} else {
|
||||||
|
const nextSeparatorIndex = text.slice(endIndex).search(/\S/);
|
||||||
|
if (nextSeparatorIndex !== -1) {
|
||||||
|
endIndex += nextSeparatorIndex;
|
||||||
|
chunkText = text.slice(startIndex, endIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkText = chunkText.trim();
|
||||||
|
if (chunkText) {
|
||||||
|
chunks.push({
|
||||||
|
text: chunkText,
|
||||||
|
isFinished: endIndex >= textLength,
|
||||||
|
});
|
||||||
|
} else if (chunks.length > 0) {
|
||||||
|
chunks[chunks.length - 1].isFinished = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
startIndex = endIndex;
|
||||||
|
while (startIndex < textLength && text[startIndex].trim() === '') {
|
||||||
|
startIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Input stream text to speech
|
* Input stream text to speech
|
||||||
* @param {Express.Response} res
|
* @param {Express.Response} res
|
||||||
|
|
@ -307,6 +365,7 @@ module.exports = {
|
||||||
inputStreamTextToSpeech,
|
inputStreamTextToSpeech,
|
||||||
findLastSeparatorIndex,
|
findLastSeparatorIndex,
|
||||||
createChunkProcessor,
|
createChunkProcessor,
|
||||||
|
splitTextIntoChunks,
|
||||||
llmMessageSource,
|
llmMessageSource,
|
||||||
getRandomVoiceId,
|
getRandomVoiceId,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
|
const { createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
|
||||||
const { Message } = require('~/models/Message');
|
const { Message } = require('~/models/Message');
|
||||||
const { createChunkProcessor } = require('./streamAudio');
|
|
||||||
|
|
||||||
jest.mock('~/models/Message', () => ({
|
jest.mock('~/models/Message', () => ({
|
||||||
Message: {
|
Message: {
|
||||||
|
|
@ -86,3 +86,52 @@ describe('processChunks', () => {
|
||||||
expect(Message.findOne().lean).toHaveBeenCalledTimes(2);
|
expect(Message.findOne().lean).toHaveBeenCalledTimes(2);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('splitTextIntoChunks', () => {
|
||||||
|
test('splits text into chunks of specified size with default separators', () => {
|
||||||
|
const text = 'This is a test. This is only a test! Make sure it works properly? Okay.';
|
||||||
|
const chunkSize = 20;
|
||||||
|
const expectedChunks = [
|
||||||
|
{ text: 'This is a test.', isFinished: false },
|
||||||
|
{ text: 'This is only a test!', isFinished: false },
|
||||||
|
{ text: 'Make sure it works p', isFinished: false },
|
||||||
|
{ text: 'roperly? Okay.', isFinished: true },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = splitTextIntoChunks(text, chunkSize);
|
||||||
|
expect(result).toEqual(expectedChunks);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('splits text into chunks with default size', () => {
|
||||||
|
const text = 'A'.repeat(8000) + '. The end.';
|
||||||
|
const expectedChunks = [
|
||||||
|
{ text: 'A'.repeat(4000), isFinished: false },
|
||||||
|
{ text: 'A'.repeat(4000), isFinished: false },
|
||||||
|
{ text: '. The end.', isFinished: true },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = splitTextIntoChunks(text);
|
||||||
|
expect(result).toEqual(expectedChunks);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('returns a single chunk if text length is less than chunk size', () => {
|
||||||
|
const text = 'Short text.';
|
||||||
|
const expectedChunks = [{ text: 'Short text.', isFinished: true }];
|
||||||
|
|
||||||
|
const result = splitTextIntoChunks(text, 4000);
|
||||||
|
expect(result).toEqual(expectedChunks);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles text with no separators correctly', () => {
|
||||||
|
const text = 'ThisTextHasNoSeparatorsAndIsVeryLong'.repeat(100);
|
||||||
|
const chunkSize = 4000;
|
||||||
|
const expectedChunks = [{ text: text, isFinished: true }];
|
||||||
|
|
||||||
|
const result = splitTextIntoChunks(text, chunkSize);
|
||||||
|
expect(result).toEqual(expectedChunks);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('throws an error when text is empty', () => {
|
||||||
|
expect(() => splitTextIntoChunks('')).toThrow('Text is required');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
const axios = require('axios');
|
const axios = require('axios');
|
||||||
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
|
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
|
||||||
const { getRandomVoiceId, createChunkProcessor } = require('./streamAudio');
|
const { getRandomVoiceId, createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
|
||||||
const { extractEnvVariable } = require('librechat-data-provider');
|
const { extractEnvVariable } = require('librechat-data-provider');
|
||||||
const { logger } = require('~/config');
|
const { logger } = require('~/config');
|
||||||
|
|
||||||
|
|
@ -54,7 +54,7 @@ function removeUndefined(obj) {
|
||||||
* This function prepares the necessary data and headers for making a request to the OpenAI TTS
|
* This function prepares the necessary data and headers for making a request to the OpenAI TTS
|
||||||
* It uses the provided TTS schema, input text, and voice to create the request
|
* It uses the provided TTS schema, input text, and voice to create the request
|
||||||
*
|
*
|
||||||
* @param {Object} ttsSchema - The TTS schema containing the OpenAI configuration
|
* @param {TCustomConfig['tts']['openai']} ttsSchema - The TTS schema containing the OpenAI configuration
|
||||||
* @param {string} input - The text to be converted to speech
|
* @param {string} input - The text to be converted to speech
|
||||||
* @param {string} voice - The voice to be used for the speech
|
* @param {string} voice - The voice to be used for the speech
|
||||||
*
|
*
|
||||||
|
|
@ -62,27 +62,27 @@ function removeUndefined(obj) {
|
||||||
* If an error occurs, it throws an error with a message indicating that the selected voice is not available
|
* If an error occurs, it throws an error with a message indicating that the selected voice is not available
|
||||||
*/
|
*/
|
||||||
function openAIProvider(ttsSchema, input, voice) {
|
function openAIProvider(ttsSchema, input, voice) {
|
||||||
const url = ttsSchema.openai?.url || 'https://api.openai.com/v1/audio/speech';
|
const url = ttsSchema?.url || 'https://api.openai.com/v1/audio/speech';
|
||||||
|
|
||||||
if (
|
if (
|
||||||
ttsSchema.openai?.voices &&
|
ttsSchema?.voices &&
|
||||||
ttsSchema.openai.voices.length > 0 &&
|
ttsSchema.voices.length > 0 &&
|
||||||
!ttsSchema.openai.voices.includes(voice) &&
|
!ttsSchema.voices.includes(voice) &&
|
||||||
!ttsSchema.openai.voices.includes('ALL')
|
!ttsSchema.voices.includes('ALL')
|
||||||
) {
|
) {
|
||||||
throw new Error(`Voice ${voice} is not available.`);
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = {
|
let data = {
|
||||||
input,
|
input,
|
||||||
model: ttsSchema.openai?.model,
|
model: ttsSchema?.model,
|
||||||
voice: ttsSchema.openai?.voices && ttsSchema.openai.voices.length > 0 ? voice : undefined,
|
voice: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
|
||||||
backend: ttsSchema.openai?.backend,
|
backend: ttsSchema?.backend,
|
||||||
};
|
};
|
||||||
|
|
||||||
let headers = {
|
let headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
Authorization: 'Bearer ' + extractEnvVariable(ttsSchema.openai?.apiKey),
|
Authorization: 'Bearer ' + extractEnvVariable(ttsSchema?.apiKey),
|
||||||
};
|
};
|
||||||
|
|
||||||
[data, headers].forEach(removeUndefined);
|
[data, headers].forEach(removeUndefined);
|
||||||
|
|
@ -95,7 +95,7 @@ function openAIProvider(ttsSchema, input, voice) {
|
||||||
* This function prepares the necessary data and headers for making a request to the Eleven Labs TTS
|
* This function prepares the necessary data and headers for making a request to the Eleven Labs TTS
|
||||||
* It uses the provided TTS schema, input text, and voice to create the request
|
* It uses the provided TTS schema, input text, and voice to create the request
|
||||||
*
|
*
|
||||||
* @param {Object} ttsSchema - The TTS schema containing the Eleven Labs configuration
|
* @param {TCustomConfig['tts']['elevenLabs']} ttsSchema - The TTS schema containing the Eleven Labs configuration
|
||||||
* @param {string} input - The text to be converted to speech
|
* @param {string} input - The text to be converted to speech
|
||||||
* @param {string} voice - The voice to be used for the speech
|
* @param {string} voice - The voice to be used for the speech
|
||||||
* @param {boolean} stream - Whether to stream the audio or not
|
* @param {boolean} stream - Whether to stream the audio or not
|
||||||
|
|
@ -105,34 +105,31 @@ function openAIProvider(ttsSchema, input, voice) {
|
||||||
*/
|
*/
|
||||||
function elevenLabsProvider(ttsSchema, input, voice, stream) {
|
function elevenLabsProvider(ttsSchema, input, voice, stream) {
|
||||||
let url =
|
let url =
|
||||||
ttsSchema.elevenlabs?.url ||
|
ttsSchema?.url ||
|
||||||
`https://api.elevenlabs.io/v1/text-to-speech/{voice_id}${stream ? '/stream' : ''}`;
|
`https://api.elevenlabs.io/v1/text-to-speech/{voice_id}${stream ? '/stream' : ''}`;
|
||||||
|
|
||||||
if (
|
if (!ttsSchema?.voices.includes(voice) && !ttsSchema?.voices.includes('ALL')) {
|
||||||
!ttsSchema.elevenlabs?.voices.includes(voice) &&
|
|
||||||
!ttsSchema.elevenlabs?.voices.includes('ALL')
|
|
||||||
) {
|
|
||||||
throw new Error(`Voice ${voice} is not available.`);
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
url = url.replace('{voice_id}', voice);
|
url = url.replace('{voice_id}', voice);
|
||||||
|
|
||||||
let data = {
|
let data = {
|
||||||
model_id: ttsSchema.elevenlabs?.model,
|
model_id: ttsSchema?.model,
|
||||||
text: input,
|
text: input,
|
||||||
// voice_id: voice,
|
// voice_id: voice,
|
||||||
voice_settings: {
|
voice_settings: {
|
||||||
similarity_boost: ttsSchema.elevenlabs?.voice_settings?.similarity_boost,
|
similarity_boost: ttsSchema?.voice_settings?.similarity_boost,
|
||||||
stability: ttsSchema.elevenlabs?.voice_settings?.stability,
|
stability: ttsSchema?.voice_settings?.stability,
|
||||||
style: ttsSchema.elevenlabs?.voice_settings?.style,
|
style: ttsSchema?.voice_settings?.style,
|
||||||
use_speaker_boost: ttsSchema.elevenlabs?.voice_settings?.use_speaker_boost || undefined,
|
use_speaker_boost: ttsSchema?.voice_settings?.use_speaker_boost || undefined,
|
||||||
},
|
},
|
||||||
pronunciation_dictionary_locators: ttsSchema.elevenlabs?.pronunciation_dictionary_locators,
|
pronunciation_dictionary_locators: ttsSchema?.pronunciation_dictionary_locators,
|
||||||
};
|
};
|
||||||
|
|
||||||
let headers = {
|
let headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'xi-api-key': extractEnvVariable(ttsSchema.elevenlabs?.apiKey),
|
'xi-api-key': extractEnvVariable(ttsSchema?.apiKey),
|
||||||
Accept: 'audio/mpeg',
|
Accept: 'audio/mpeg',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -146,7 +143,7 @@ function elevenLabsProvider(ttsSchema, input, voice, stream) {
|
||||||
* This function prepares the necessary data and headers for making a request to the LocalAI TTS
|
* This function prepares the necessary data and headers for making a request to the LocalAI TTS
|
||||||
* It uses the provided TTS schema, input text, and voice to create the request
|
* It uses the provided TTS schema, input text, and voice to create the request
|
||||||
*
|
*
|
||||||
* @param {Object} ttsSchema - The TTS schema containing the LocalAI configuration
|
* @param {TCustomConfig['tts']['localai']} ttsSchema - The TTS schema containing the LocalAI configuration
|
||||||
* @param {string} input - The text to be converted to speech
|
* @param {string} input - The text to be converted to speech
|
||||||
* @param {string} voice - The voice to be used for the speech
|
* @param {string} voice - The voice to be used for the speech
|
||||||
*
|
*
|
||||||
|
|
@ -154,102 +151,78 @@ function elevenLabsProvider(ttsSchema, input, voice, stream) {
|
||||||
* @throws {Error} Throws an error if the selected voice is not available
|
* @throws {Error} Throws an error if the selected voice is not available
|
||||||
*/
|
*/
|
||||||
function localAIProvider(ttsSchema, input, voice) {
|
function localAIProvider(ttsSchema, input, voice) {
|
||||||
let url = ttsSchema.localai?.url;
|
let url = ttsSchema?.url;
|
||||||
|
|
||||||
if (
|
if (
|
||||||
ttsSchema.localai?.voices &&
|
ttsSchema?.voices &&
|
||||||
ttsSchema.localai.voices.length > 0 &&
|
ttsSchema.voices.length > 0 &&
|
||||||
!ttsSchema.localai.voices.includes(voice) &&
|
!ttsSchema.voices.includes(voice) &&
|
||||||
!ttsSchema.localai.voices.includes('ALL')
|
!ttsSchema.voices.includes('ALL')
|
||||||
) {
|
) {
|
||||||
throw new Error(`Voice ${voice} is not available.`);
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = {
|
let data = {
|
||||||
input,
|
input,
|
||||||
model: ttsSchema.localai?.voices && ttsSchema.localai.voices.length > 0 ? voice : undefined,
|
model: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
|
||||||
backend: ttsSchema.localai?.backend,
|
backend: ttsSchema?.backend,
|
||||||
};
|
};
|
||||||
|
|
||||||
let headers = {
|
let headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
Authorization: 'Bearer ' + extractEnvVariable(ttsSchema.localai?.apiKey),
|
Authorization: 'Bearer ' + extractEnvVariable(ttsSchema?.apiKey),
|
||||||
};
|
};
|
||||||
|
|
||||||
[data, headers].forEach(removeUndefined);
|
[data, headers].forEach(removeUndefined);
|
||||||
|
|
||||||
if (extractEnvVariable(ttsSchema.localai.apiKey) === '') {
|
if (extractEnvVariable(ttsSchema.apiKey) === '') {
|
||||||
delete headers.Authorization;
|
delete headers.Authorization;
|
||||||
}
|
}
|
||||||
|
|
||||||
return [url, data, headers];
|
return [url, data, headers];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* not used */
|
/**
|
||||||
/*
|
*
|
||||||
async function streamAudioFromWebSocket(req, res) {
|
* Returns provider and its schema for use with TTS requests
|
||||||
const { voice } = req.body;
|
* @param {TCustomConfig} customConfig
|
||||||
const customConfig = await getCustomConfig();
|
* @param {string} _voice
|
||||||
|
* @returns {Promise<[string, TProviderSchema]>}
|
||||||
if (!customConfig) {
|
*/
|
||||||
return res.status(500).send('Custom config not found');
|
async function getProviderSchema(customConfig) {
|
||||||
}
|
const provider = getProvider(customConfig.tts);
|
||||||
|
return [provider, customConfig.tts[provider]];
|
||||||
const ttsSchema = customConfig.tts;
|
|
||||||
const provider = getProvider(ttsSchema);
|
|
||||||
|
|
||||||
if (provider !== 'elevenlabs') {
|
|
||||||
return res.status(400).send('WebSocket streaming is only supported for Eleven Labs');
|
|
||||||
}
|
|
||||||
|
|
||||||
const url =
|
|
||||||
ttsSchema.elevenlabs.websocketUrl ||
|
|
||||||
'wss://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream-input?model_id={model}'
|
|
||||||
.replace('{voice_id}', voice)
|
|
||||||
.replace('{model}', ttsSchema.elevenlabs.model);
|
|
||||||
const ws = new WebSocket(url);
|
|
||||||
|
|
||||||
ws.onopen = () => {
|
|
||||||
logger.debug('WebSocket connection opened');
|
|
||||||
sendTextToWebsocket(ws, (data) => {
|
|
||||||
res.write(data); // Stream data directly to the response
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
ws.onclose = () => {
|
|
||||||
logger.debug('WebSocket connection closed');
|
|
||||||
res.end(); // End the response when the WebSocket is closed
|
|
||||||
};
|
|
||||||
|
|
||||||
ws.onerror = (error) => {
|
|
||||||
logger.error('WebSocket error:', error);
|
|
||||||
res.status(500).send('WebSocket error');
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param {TCustomConfig} customConfig
|
* Returns a tuple of the TTS schema as well as the voice for the TTS request
|
||||||
* @param {string} voice
|
* @param {TProviderSchema} providerSchema
|
||||||
* @returns {Promise<ArrayBuffer>}
|
* @param {string} requestVoice
|
||||||
|
* @returns {Promise<string>}
|
||||||
*/
|
*/
|
||||||
async function ttsRequest(
|
async function getVoice(providerSchema, requestVoice) {
|
||||||
customConfig,
|
const voices = providerSchema.voices.filter((voice) => voice && voice.toUpperCase() !== 'ALL');
|
||||||
{ input, voice: _v, stream = true } = { input: '', stream: true },
|
let voice = requestVoice;
|
||||||
) {
|
|
||||||
const ttsSchema = customConfig.tts;
|
|
||||||
const provider = getProvider(ttsSchema);
|
|
||||||
const voices = ttsSchema[provider].voices.filter(
|
|
||||||
(voice) => voice && voice.toUpperCase() !== 'ALL',
|
|
||||||
);
|
|
||||||
let voice = _v;
|
|
||||||
if (!voice || !voices.includes(voice) || (voice.toUpperCase() === 'ALL' && voices.length > 1)) {
|
if (!voice || !voices.includes(voice) || (voice.toUpperCase() === 'ALL' && voices.length > 1)) {
|
||||||
voice = getRandomVoiceId(voices);
|
voice = getRandomVoiceId(voices);
|
||||||
}
|
}
|
||||||
|
|
||||||
let [url, data, headers] = [];
|
return voice;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param {string} provider
|
||||||
|
* @param {TProviderSchema} ttsSchema
|
||||||
|
* @param {object} params
|
||||||
|
* @param {string} params.voice
|
||||||
|
* @param {string} params.input
|
||||||
|
* @param {boolean} [params.stream]
|
||||||
|
* @returns {Promise<ArrayBuffer>}
|
||||||
|
*/
|
||||||
|
async function ttsRequest(provider, ttsSchema, { input, voice, stream = true } = { stream: true }) {
|
||||||
|
let [url, data, headers] = [];
|
||||||
switch (provider) {
|
switch (provider) {
|
||||||
case 'openai':
|
case 'openai':
|
||||||
[url, data, headers] = openAIProvider(ttsSchema, input, voice);
|
[url, data, headers] = openAIProvider(ttsSchema, input, voice);
|
||||||
|
|
@ -283,7 +256,7 @@ async function ttsRequest(
|
||||||
* @throws {Error} Throws an error if the provider is invalid
|
* @throws {Error} Throws an error if the provider is invalid
|
||||||
*/
|
*/
|
||||||
async function textToSpeech(req, res) {
|
async function textToSpeech(req, res) {
|
||||||
const { input, voice } = req.body;
|
const { input } = req.body;
|
||||||
|
|
||||||
if (!input) {
|
if (!input) {
|
||||||
return res.status(400).send('Missing text in request body');
|
return res.status(400).send('Missing text in request body');
|
||||||
|
|
@ -296,8 +269,47 @@ async function textToSpeech(req, res) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
res.setHeader('Content-Type', 'audio/mpeg');
|
res.setHeader('Content-Type', 'audio/mpeg');
|
||||||
const response = await ttsRequest(customConfig, { input, voice });
|
const [provider, ttsSchema] = await getProviderSchema(customConfig);
|
||||||
response.data.pipe(res);
|
const voice = await getVoice(ttsSchema, req.body.voice);
|
||||||
|
if (input.length < 4096) {
|
||||||
|
const response = await ttsRequest(provider, ttsSchema, { input, voice });
|
||||||
|
response.data.pipe(res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const textChunks = splitTextIntoChunks(input, 1000);
|
||||||
|
|
||||||
|
for (const chunk of textChunks) {
|
||||||
|
try {
|
||||||
|
const response = await ttsRequest(provider, ttsSchema, {
|
||||||
|
voice,
|
||||||
|
input: chunk.text,
|
||||||
|
stream: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.debug(`[textToSpeech] user: ${req?.user?.id} | writing audio stream`);
|
||||||
|
await new Promise((resolve) => {
|
||||||
|
response.data.pipe(res, { end: chunk.isFinished });
|
||||||
|
response.data.on('end', () => {
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
if (chunk.isFinished) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (innerError) {
|
||||||
|
logger.error('Error processing update:', chunk, innerError);
|
||||||
|
if (!res.headersSent) {
|
||||||
|
res.status(500).end();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!res.headersSent) {
|
||||||
|
res.end();
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('An error occurred while creating the audio stream:', error);
|
logger.error('An error occurred while creating the audio stream:', error);
|
||||||
res.status(500).send('An error occurred');
|
res.status(500).send('An error occurred');
|
||||||
|
|
@ -311,8 +323,17 @@ async function streamAudio(req, res) {
|
||||||
return res.status(500).send('Custom config not found');
|
return res.status(500).send('Custom config not found');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const [provider, ttsSchema] = await getProviderSchema(customConfig);
|
||||||
|
const voice = await getVoice(ttsSchema, req.body.voice);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let shouldContinue = true;
|
let shouldContinue = true;
|
||||||
|
|
||||||
|
req.on('close', () => {
|
||||||
|
logger.warn('[streamAudio] Audio Stream Request closed by client');
|
||||||
|
shouldContinue = false;
|
||||||
|
});
|
||||||
|
|
||||||
const processChunks = createChunkProcessor(req.body.messageId);
|
const processChunks = createChunkProcessor(req.body.messageId);
|
||||||
|
|
||||||
while (shouldContinue) {
|
while (shouldContinue) {
|
||||||
|
|
@ -337,7 +358,8 @@ async function streamAudio(req, res) {
|
||||||
|
|
||||||
for (const update of updates) {
|
for (const update of updates) {
|
||||||
try {
|
try {
|
||||||
const response = await ttsRequest(customConfig, {
|
const response = await ttsRequest(provider, ttsSchema, {
|
||||||
|
voice,
|
||||||
input: update.text,
|
input: update.text,
|
||||||
stream: true,
|
stream: true,
|
||||||
});
|
});
|
||||||
|
|
@ -348,7 +370,7 @@ async function streamAudio(req, res) {
|
||||||
|
|
||||||
logger.debug(`[streamAudio] user: ${req?.user?.id} | writing audio stream`);
|
logger.debug(`[streamAudio] user: ${req?.user?.id} | writing audio stream`);
|
||||||
await new Promise((resolve) => {
|
await new Promise((resolve) => {
|
||||||
response.data.pipe(res, { end: false });
|
response.data.pipe(res, { end: update.isFinished });
|
||||||
response.data.on('end', () => {
|
response.data.on('end', () => {
|
||||||
resolve();
|
resolve();
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
const {
|
const {
|
||||||
Capabilities,
|
Capabilities,
|
||||||
EModelEndpoint,
|
|
||||||
assistantEndpointSchema,
|
assistantEndpointSchema,
|
||||||
defaultAssistantsVersion,
|
defaultAssistantsVersion,
|
||||||
} = require('librechat-data-provider');
|
} = require('librechat-data-provider');
|
||||||
|
|
|
||||||
|
|
@ -349,6 +349,12 @@
|
||||||
* @memberof typedefs
|
* @memberof typedefs
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @exports TProviderSchema
|
||||||
|
* @typedef {import('librechat-data-provider').TProviderSchema} TProviderSchema
|
||||||
|
* @memberof typedefs
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @exports TEndpoint
|
* @exports TEndpoint
|
||||||
* @typedef {import('librechat-data-provider').TEndpoint} TEndpoint
|
* @typedef {import('librechat-data-provider').TEndpoint} TEndpoint
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
import { useParams } from 'react-router-dom';
|
import { useParams } from 'react-router-dom';
|
||||||
|
import { useEffect, useCallback } from 'react';
|
||||||
import { QueryKeys } from 'librechat-data-provider';
|
import { QueryKeys } from 'librechat-data-provider';
|
||||||
import { useQueryClient } from '@tanstack/react-query';
|
import { useQueryClient } from '@tanstack/react-query';
|
||||||
import { useEffect, useCallback } from 'react';
|
|
||||||
import { useRecoilState, useRecoilValue, useSetRecoilState } from 'recoil';
|
import { useRecoilState, useRecoilValue, useSetRecoilState } from 'recoil';
|
||||||
import type { TMessage } from 'librechat-data-provider';
|
import type { TMessage } from 'librechat-data-provider';
|
||||||
import { useCustomAudioRef, MediaSourceAppender } from '~/hooks/Audio';
|
import { useCustomAudioRef, MediaSourceAppender, usePauseGlobalAudio } from '~/hooks/Audio';
|
||||||
import { useAuthContext } from '~/hooks';
|
import { useAuthContext } from '~/hooks';
|
||||||
import { globalAudioId } from '~/common';
|
import { globalAudioId } from '~/common';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
@ -24,6 +24,7 @@ export default function StreamAudio({ index = 0 }) {
|
||||||
const cacheTTS = useRecoilValue(store.cacheTTS);
|
const cacheTTS = useRecoilValue(store.cacheTTS);
|
||||||
const playbackRate = useRecoilValue(store.playbackRate);
|
const playbackRate = useRecoilValue(store.playbackRate);
|
||||||
|
|
||||||
|
const voice = useRecoilValue(store.voice);
|
||||||
const activeRunId = useRecoilValue(store.activeRunFamily(index));
|
const activeRunId = useRecoilValue(store.activeRunFamily(index));
|
||||||
const automaticPlayback = useRecoilValue(store.automaticPlayback);
|
const automaticPlayback = useRecoilValue(store.automaticPlayback);
|
||||||
const isSubmitting = useRecoilValue(store.isSubmittingFamily(index));
|
const isSubmitting = useRecoilValue(store.isSubmittingFamily(index));
|
||||||
|
|
@ -34,6 +35,7 @@ export default function StreamAudio({ index = 0 }) {
|
||||||
const [globalAudioURL, setGlobalAudioURL] = useRecoilState(store.globalAudioURLFamily(index));
|
const [globalAudioURL, setGlobalAudioURL] = useRecoilState(store.globalAudioURLFamily(index));
|
||||||
|
|
||||||
const { audioRef } = useCustomAudioRef({ setIsPlaying });
|
const { audioRef } = useCustomAudioRef({ setIsPlaying });
|
||||||
|
const { pauseGlobalAudio } = usePauseGlobalAudio();
|
||||||
|
|
||||||
const { conversationId: paramId } = useParams();
|
const { conversationId: paramId } = useParams();
|
||||||
const queryParam = paramId === 'new' ? paramId : latestMessage?.conversationId ?? paramId ?? '';
|
const queryParam = paramId === 'new' ? paramId : latestMessage?.conversationId ?? paramId ?? '';
|
||||||
|
|
@ -90,7 +92,7 @@ export default function StreamAudio({ index = 0 }) {
|
||||||
const response = await fetch('/api/files/tts', {
|
const response = await fetch('/api/files/tts', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${token}` },
|
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${token}` },
|
||||||
body: JSON.stringify({ messageId: latestMessage?.messageId, runId: activeRunId }),
|
body: JSON.stringify({ messageId: latestMessage?.messageId, runId: activeRunId, voice }),
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
|
|
@ -166,6 +168,7 @@ export default function StreamAudio({ index = 0 }) {
|
||||||
audioRunId,
|
audioRunId,
|
||||||
cacheTTS,
|
cacheTTS,
|
||||||
audioRef,
|
audioRef,
|
||||||
|
voice,
|
||||||
token,
|
token,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|
@ -180,6 +183,12 @@ export default function StreamAudio({ index = 0 }) {
|
||||||
}
|
}
|
||||||
}, [audioRef, globalAudioURL, playbackRate]);
|
}, [audioRef, globalAudioURL, playbackRate]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
pauseGlobalAudio();
|
||||||
|
// We only want the effect to run when the paramId changes
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [paramId]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<audio
|
<audio
|
||||||
ref={audioRef}
|
ref={audioRef}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import { useCallback, useEffect, useState, useMemo } from 'react';
|
import { useCallback, useEffect, useState, useMemo, useRef } from 'react';
|
||||||
import { useTextToSpeechMutation } from '~/data-provider';
|
import { useTextToSpeechMutation } from '~/data-provider';
|
||||||
|
import useLocalize from '~/hooks/useLocalize';
|
||||||
import { useToastContext } from '~/Providers';
|
import { useToastContext } from '~/Providers';
|
||||||
import store from '~/store';
|
import store from '~/store';
|
||||||
|
|
||||||
|
|
@ -12,16 +13,16 @@ const createFormData = (text: string, voice: string) => {
|
||||||
};
|
};
|
||||||
|
|
||||||
function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
|
const localize = useLocalize();
|
||||||
const { showToast } = useToastContext();
|
const { showToast } = useToastContext();
|
||||||
const voice = useRecoilValue(store.voice);
|
const voice = useRecoilValue(store.voice);
|
||||||
const cacheTTS = useRecoilValue(store.cacheTTS);
|
const cacheTTS = useRecoilValue(store.cacheTTS);
|
||||||
const playbackRate = useRecoilValue(store.playbackRate);
|
const playbackRate = useRecoilValue(store.playbackRate);
|
||||||
|
|
||||||
const [text, setText] = useState<string | null>(null);
|
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||||
|
|
||||||
const [downloadFile, setDownloadFile] = useState(false);
|
const [downloadFile, setDownloadFile] = useState(false);
|
||||||
const [isLocalSpeaking, setIsSpeaking] = useState(false);
|
const [isLocalSpeaking, setIsSpeaking] = useState(false);
|
||||||
const [blobUrl, setBlobUrl] = useState<string | null>(null);
|
|
||||||
const [audio, setAudio] = useState<HTMLAudioElement | null>(null);
|
|
||||||
|
|
||||||
/* Global Audio Variables */
|
/* Global Audio Variables */
|
||||||
const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index));
|
const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index));
|
||||||
|
|
@ -29,10 +30,13 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
|
|
||||||
const playAudio = (blobUrl: string) => {
|
const playAudio = (blobUrl: string) => {
|
||||||
const newAudio = new Audio(blobUrl);
|
const newAudio = new Audio(blobUrl);
|
||||||
if (playbackRate && playbackRate !== 1) {
|
const initializeAudio = () => {
|
||||||
newAudio.playbackRate = playbackRate;
|
if (playbackRate && playbackRate !== 1) {
|
||||||
}
|
newAudio.playbackRate = playbackRate;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
initializeAudio();
|
||||||
const playPromise = () => newAudio.play().then(() => setIsSpeaking(true));
|
const playPromise = () => newAudio.play().then(() => setIsSpeaking(true));
|
||||||
|
|
||||||
playPromise().catch((error: Error) => {
|
playPromise().catch((error: Error) => {
|
||||||
|
|
@ -40,10 +44,12 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
error?.message &&
|
error?.message &&
|
||||||
error.message.includes('The play() request was interrupted by a call to pause()')
|
error.message.includes('The play() request was interrupted by a call to pause()')
|
||||||
) {
|
) {
|
||||||
|
console.log('Play request was interrupted by a call to pause()');
|
||||||
|
initializeAudio();
|
||||||
return playPromise().catch(console.error);
|
return playPromise().catch(console.error);
|
||||||
}
|
}
|
||||||
console.error(error);
|
console.error(error);
|
||||||
showToast({ message: `Error playing audio: ${error.message}`, status: 'error' });
|
showToast({ message: localize('com_nav_audio_play_error', error.message), status: 'error' });
|
||||||
});
|
});
|
||||||
|
|
||||||
newAudio.onended = () => {
|
newAudio.onended = () => {
|
||||||
|
|
@ -52,8 +58,7 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
};
|
};
|
||||||
|
|
||||||
setAudio(newAudio);
|
audioRef.current = newAudio;
|
||||||
setBlobUrl(blobUrl);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const downloadAudio = (blobUrl: string) => {
|
const downloadAudio = (blobUrl: string) => {
|
||||||
|
|
@ -65,35 +70,32 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
};
|
};
|
||||||
|
|
||||||
const { mutate: processAudio, isLoading: isProcessing } = useTextToSpeechMutation({
|
const { mutate: processAudio, isLoading: isProcessing } = useTextToSpeechMutation({
|
||||||
onSuccess: async (data: ArrayBuffer) => {
|
onMutate: (variables) => {
|
||||||
|
const inputText = (variables.get('input') ?? '') as string;
|
||||||
|
if (inputText.length >= 4096) {
|
||||||
|
showToast({
|
||||||
|
message: localize('com_nav_long_audio_warning'),
|
||||||
|
status: 'warning',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
},
|
||||||
|
onSuccess: async (data: ArrayBuffer, variables) => {
|
||||||
try {
|
try {
|
||||||
const mediaSource = new MediaSource();
|
const inputText = (variables.get('input') ?? '') as string;
|
||||||
const audio = new Audio();
|
const audioBlob = new Blob([data], { type: 'audio/mpeg' });
|
||||||
audio.src = URL.createObjectURL(mediaSource);
|
|
||||||
audio.autoplay = true;
|
|
||||||
|
|
||||||
mediaSource.onsourceopen = () => {
|
if (cacheTTS && inputText) {
|
||||||
const sourceBuffer = mediaSource.addSourceBuffer('audio/mpeg');
|
|
||||||
sourceBuffer.appendBuffer(data);
|
|
||||||
};
|
|
||||||
|
|
||||||
audio.onended = () => {
|
|
||||||
URL.revokeObjectURL(audio.src);
|
|
||||||
setIsSpeaking(false);
|
|
||||||
};
|
|
||||||
|
|
||||||
setAudio(audio);
|
|
||||||
|
|
||||||
if (cacheTTS) {
|
|
||||||
const cache = await caches.open('tts-responses');
|
const cache = await caches.open('tts-responses');
|
||||||
const request = new Request(text!);
|
const request = new Request(inputText!);
|
||||||
const response = new Response(new Blob([data], { type: 'audio/mpeg' }));
|
const response = new Response(audioBlob);
|
||||||
cache.put(request, response);
|
cache.put(request, response);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const blobUrl = URL.createObjectURL(audioBlob);
|
||||||
if (downloadFile) {
|
if (downloadFile) {
|
||||||
downloadAudio(audio.src);
|
downloadAudio(blobUrl);
|
||||||
}
|
}
|
||||||
|
playAudio(blobUrl);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
showToast({
|
showToast({
|
||||||
message: `Error processing audio: ${(error as Error).message}`,
|
message: `Error processing audio: ${(error as Error).message}`,
|
||||||
|
|
@ -102,13 +104,15 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
onError: (error: unknown) => {
|
onError: (error: unknown) => {
|
||||||
showToast({ message: `Error: ${(error as Error).message}`, status: 'error' });
|
showToast({
|
||||||
|
message: localize('com_nav_audio_process_error', (error as Error).message),
|
||||||
|
status: 'error',
|
||||||
|
});
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const generateSpeechExternal = async (text: string, download: boolean) => {
|
const generateSpeechExternal = async (text: string, download: boolean) => {
|
||||||
setText(text);
|
const cachedResponse = await caches.match(text);
|
||||||
const cachedResponse = await getCachedResponse(text);
|
|
||||||
|
|
||||||
if (cachedResponse && cacheTTS) {
|
if (cachedResponse && cacheTTS) {
|
||||||
handleCachedResponse(cachedResponse, download);
|
handleCachedResponse(cachedResponse, download);
|
||||||
|
|
@ -119,8 +123,6 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const getCachedResponse = async (text: string) => await caches.match(text);
|
|
||||||
|
|
||||||
const handleCachedResponse = async (cachedResponse: Response, download: boolean) => {
|
const handleCachedResponse = async (cachedResponse: Response, download: boolean) => {
|
||||||
const audioBlob = await cachedResponse.blob();
|
const audioBlob = await cachedResponse.blob();
|
||||||
const blobUrl = URL.createObjectURL(audioBlob);
|
const blobUrl = URL.createObjectURL(audioBlob);
|
||||||
|
|
@ -132,12 +134,13 @@ function useTextToSpeechExternal(isLast: boolean, index = 0) {
|
||||||
};
|
};
|
||||||
|
|
||||||
const cancelSpeech = useCallback(() => {
|
const cancelSpeech = useCallback(() => {
|
||||||
if (audio) {
|
if (audioRef.current) {
|
||||||
audio.pause();
|
audioRef.current.pause();
|
||||||
blobUrl && URL.revokeObjectURL(blobUrl);
|
audioRef.current.src && URL.revokeObjectURL(audioRef.current.src);
|
||||||
|
audioRef.current = null;
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
}
|
}
|
||||||
}, [audio, blobUrl]);
|
}, []);
|
||||||
|
|
||||||
useEffect(() => cancelSpeech, [cancelSpeech]);
|
useEffect(() => cancelSpeech, [cancelSpeech]);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -550,6 +550,9 @@ export default {
|
||||||
com_nav_auto_transcribe_audio: 'Auto transcribe audio',
|
com_nav_auto_transcribe_audio: 'Auto transcribe audio',
|
||||||
com_nav_db_sensitivity: 'Decibel sensitivity',
|
com_nav_db_sensitivity: 'Decibel sensitivity',
|
||||||
com_nav_playback_rate: 'Audio Playback Rate',
|
com_nav_playback_rate: 'Audio Playback Rate',
|
||||||
|
com_nav_audio_play_error: 'Error playing audio: {0}',
|
||||||
|
com_nav_audio_process_error: 'Error processing audio: {0}',
|
||||||
|
com_nav_long_audio_warning: 'Longer texts will take longer to process.',
|
||||||
com_nav_engine: 'Engine',
|
com_nav_engine: 'Engine',
|
||||||
com_nav_browser: 'Browser',
|
com_nav_browser: 'Browser',
|
||||||
com_nav_external: 'External',
|
com_nav_external: 'External',
|
||||||
|
|
|
||||||
|
|
@ -223,41 +223,41 @@ export const azureEndpointSchema = z
|
||||||
export type TAzureConfig = Omit<z.infer<typeof azureEndpointSchema>, 'groups'> &
|
export type TAzureConfig = Omit<z.infer<typeof azureEndpointSchema>, 'groups'> &
|
||||||
TAzureConfigValidationResult;
|
TAzureConfigValidationResult;
|
||||||
|
|
||||||
|
const ttsOpenaiSchema = z.object({
|
||||||
|
url: z.string().optional(),
|
||||||
|
apiKey: z.string(),
|
||||||
|
model: z.string(),
|
||||||
|
voices: z.array(z.string()),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ttsElevenLabsSchema = z.object({
|
||||||
|
url: z.string().optional(),
|
||||||
|
websocketUrl: z.string().optional(),
|
||||||
|
apiKey: z.string(),
|
||||||
|
model: z.string(),
|
||||||
|
voices: z.array(z.string()),
|
||||||
|
voice_settings: z
|
||||||
|
.object({
|
||||||
|
similarity_boost: z.number().optional(),
|
||||||
|
stability: z.number().optional(),
|
||||||
|
style: z.number().optional(),
|
||||||
|
use_speaker_boost: z.boolean().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
pronunciation_dictionary_locators: z.array(z.string()).optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ttsLocalaiSchema = z.object({
|
||||||
|
url: z.string(),
|
||||||
|
apiKey: z.string().optional(),
|
||||||
|
voices: z.array(z.string()),
|
||||||
|
backend: z.string(),
|
||||||
|
});
|
||||||
|
|
||||||
const ttsSchema = z.object({
|
const ttsSchema = z.object({
|
||||||
openai: z
|
openai: ttsOpenaiSchema.optional(),
|
||||||
.object({
|
elevenLabs: ttsElevenLabsSchema.optional(),
|
||||||
url: z.string().optional(),
|
localai: ttsLocalaiSchema.optional(),
|
||||||
apiKey: z.string(),
|
|
||||||
model: z.string(),
|
|
||||||
voices: z.array(z.string()),
|
|
||||||
})
|
|
||||||
.optional(),
|
|
||||||
elevenLabs: z
|
|
||||||
.object({
|
|
||||||
url: z.string().optional(),
|
|
||||||
websocketUrl: z.string().optional(),
|
|
||||||
apiKey: z.string(),
|
|
||||||
model: z.string(),
|
|
||||||
voices: z.array(z.string()),
|
|
||||||
voice_settings: z
|
|
||||||
.object({
|
|
||||||
similarity_boost: z.number().optional(),
|
|
||||||
stability: z.number().optional(),
|
|
||||||
style: z.number().optional(),
|
|
||||||
use_speaker_boost: z.boolean().optional(),
|
|
||||||
})
|
|
||||||
.optional(),
|
|
||||||
pronunciation_dictionary_locators: z.array(z.string()).optional(),
|
|
||||||
})
|
|
||||||
.optional(),
|
|
||||||
localai: z
|
|
||||||
.object({
|
|
||||||
url: z.string(),
|
|
||||||
apiKey: z.string().optional(),
|
|
||||||
voices: z.array(z.string()),
|
|
||||||
backend: z.string(),
|
|
||||||
})
|
|
||||||
.optional(),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const sttSchema = z.object({
|
const sttSchema = z.object({
|
||||||
|
|
@ -359,6 +359,12 @@ export const getConfigDefaults = () => getSchemaDefaults(configSchema);
|
||||||
|
|
||||||
export type TCustomConfig = z.infer<typeof configSchema>;
|
export type TCustomConfig = z.infer<typeof configSchema>;
|
||||||
|
|
||||||
|
export type TProviderSchema =
|
||||||
|
| z.infer<typeof ttsOpenaiSchema>
|
||||||
|
| z.infer<typeof ttsElevenLabsSchema>
|
||||||
|
| z.infer<typeof ttsLocalaiSchema>
|
||||||
|
| undefined;
|
||||||
|
|
||||||
export enum KnownEndpoints {
|
export enum KnownEndpoints {
|
||||||
anyscale = 'anyscale',
|
anyscale = 'anyscale',
|
||||||
apipie = 'apipie',
|
apipie = 'apipie',
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue