⏯️ fix(tts): Resolve Voice Selection and Manual Playback Issues (#2845)

* fix: voice setting for autoplayback TTS

* fix(useTextToSpeechExternal): resolve stateful playback issues and consolidate state logic

* refactor: initialize tts voice and provider schema once per request

* fix(tts): edge case, longer text inputs. TODO: use continuous stream for longer text inputs

* fix(tts): pause global audio on conversation change

* refactor: keyvMongo ban cache to allow db updates for unbanning, to prevent server restart

* chore: eslint fix

* refactor: make ban cache exclusively keyvMongo
This commit is contained in:
Danny Avila 2024-05-23 16:27:36 -04:00 committed by GitHub
parent 8e66683577
commit 514a502b9c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 332 additions and 178 deletions

View file

@ -223,41 +223,41 @@ export const azureEndpointSchema = z
export type TAzureConfig = Omit<z.infer<typeof azureEndpointSchema>, 'groups'> &
TAzureConfigValidationResult;
const ttsOpenaiSchema = z.object({
url: z.string().optional(),
apiKey: z.string(),
model: z.string(),
voices: z.array(z.string()),
});
const ttsElevenLabsSchema = z.object({
url: z.string().optional(),
websocketUrl: z.string().optional(),
apiKey: z.string(),
model: z.string(),
voices: z.array(z.string()),
voice_settings: z
.object({
similarity_boost: z.number().optional(),
stability: z.number().optional(),
style: z.number().optional(),
use_speaker_boost: z.boolean().optional(),
})
.optional(),
pronunciation_dictionary_locators: z.array(z.string()).optional(),
});
const ttsLocalaiSchema = z.object({
url: z.string(),
apiKey: z.string().optional(),
voices: z.array(z.string()),
backend: z.string(),
});
const ttsSchema = z.object({
openai: z
.object({
url: z.string().optional(),
apiKey: z.string(),
model: z.string(),
voices: z.array(z.string()),
})
.optional(),
elevenLabs: z
.object({
url: z.string().optional(),
websocketUrl: z.string().optional(),
apiKey: z.string(),
model: z.string(),
voices: z.array(z.string()),
voice_settings: z
.object({
similarity_boost: z.number().optional(),
stability: z.number().optional(),
style: z.number().optional(),
use_speaker_boost: z.boolean().optional(),
})
.optional(),
pronunciation_dictionary_locators: z.array(z.string()).optional(),
})
.optional(),
localai: z
.object({
url: z.string(),
apiKey: z.string().optional(),
voices: z.array(z.string()),
backend: z.string(),
})
.optional(),
openai: ttsOpenaiSchema.optional(),
elevenLabs: ttsElevenLabsSchema.optional(),
localai: ttsLocalaiSchema.optional(),
});
const sttSchema = z.object({
@ -359,6 +359,12 @@ export const getConfigDefaults = () => getSchemaDefaults(configSchema);
export type TCustomConfig = z.infer<typeof configSchema>;
export type TProviderSchema =
| z.infer<typeof ttsOpenaiSchema>
| z.infer<typeof ttsElevenLabsSchema>
| z.infer<typeof ttsLocalaiSchema>
| undefined;
export enum KnownEndpoints {
anyscale = 'anyscale',
apipie = 'apipie',