WIP: Implement Realtime Ephemeral Token functionality and update related components

This commit is contained in:
Marco Beretta 2024-12-19 14:58:15 +01:00 committed by Danny Avila
parent 12d7028a18
commit 6b90817ae0
No known key found for this signature in database
GPG key ID: BF31EEB2C5CA0956
12 changed files with 213 additions and 13 deletions

View file

@ -3,7 +3,7 @@ const router = express.Router();
const { getCustomConfigSpeech } = require('~/server/services/Files/Audio');
router.get('/get', async (req, res) => {
router.get('/', async (req, res) => {
await getCustomConfigSpeech(req, res);
});

View file

@ -4,6 +4,7 @@ const { createTTSLimiters, createSTTLimiters } = require('~/server/middleware');
const stt = require('./stt');
const tts = require('./tts');
const customConfigSpeech = require('./customConfigSpeech');
const realtime = require('./realtime');
const router = express.Router();
@ -14,4 +15,6 @@ router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
router.use('/config', customConfigSpeech);
router.use('/realtime', realtime);
module.exports = router;

View file

@ -0,0 +1,10 @@
const express = require('express');
const router = express.Router();
const { getRealtimeConfig } = require('~/server/services/Files/Audio');
router.get('/', async (req, res) => {
await getRealtimeConfig(req, res);
});
module.exports = router;

View file

@ -0,0 +1,102 @@
const { extractEnvVariable, RealtimeVoiceProviders } = require('librechat-data-provider');
const { getCustomConfig } = require('~/server/services/Config');
const { logger } = require('~/config');
class RealtimeService {
constructor(customConfig) {
this.customConfig = customConfig;
this.providerStrategies = {
[RealtimeVoiceProviders.OPENAI]: this.openaiProvider.bind(this),
};
}
static async getInstance() {
const customConfig = await getCustomConfig();
if (!customConfig) {
throw new Error('Custom config not found');
}
return new RealtimeService(customConfig);
}
async getProviderSchema() {
const realtimeSchema = this.customConfig.speech.realtime;
if (!realtimeSchema) {
throw new Error('No Realtime schema is set in config');
}
const providers = Object.entries(realtimeSchema).filter(
([, value]) => Object.keys(value).length > 0,
);
if (providers.length !== 1) {
throw new Error(providers.length > 1 ? 'Multiple providers set' : 'No provider set');
}
return providers[0];
}
async openaiProvider(schema, voice) {
const defaultRealtimeUrl = 'https://api.openai.com/v1/realtime';
const allowedVoices = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'sage', 'shimmer', 'verse'];
if (!voice) {
throw new Error('Voice not specified');
}
if (!allowedVoices.includes(voice)) {
throw new Error(`Invalid voice: ${voice}`);
}
const apiKey = extractEnvVariable(schema.apiKey);
if (!apiKey) {
throw new Error('OpenAI API key not configured');
}
const response = await fetch('https://api.openai.com/v1/realtime/sessions', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'gpt-4o-realtime-preview-2024-12-17',
modalities: ['audio', 'text'],
voice: voice,
}),
});
const token = response.json();
return {
provider: RealtimeVoiceProviders.OPENAI,
token: token,
url: schema.url || defaultRealtimeUrl,
};
}
async getRealtimeConfig(req, res) {
try {
const [provider, schema] = await this.getProviderSchema();
const strategy = this.providerStrategies[provider];
if (!strategy) {
throw new Error(`Unsupported provider: ${provider}`);
}
const voice = req.query.voice;
const config = strategy(schema, voice);
res.json(config);
} catch (error) {
logger.error('[RealtimeService] Config generation failed:', error);
res.status(500).json({ error: error.message });
}
}
}
async function getRealtimeConfig(req, res) {
const service = await RealtimeService.getInstance();
await service.getRealtimeConfig(req, res);
}
module.exports = getRealtimeConfig;

View file

@ -1,4 +1,5 @@
const getCustomConfigSpeech = require('./getCustomConfigSpeech');
const getRealtimeConfig = require('./getRealtimeConfig');
const TTSService = require('./TTSService');
const STTService = require('./STTService');
const getVoices = require('./getVoices');
@ -6,6 +7,7 @@ const getVoices = require('./getVoices');
module.exports = {
getVoices,
getCustomConfigSpeech,
getRealtimeConfig,
...STTService,
...TTSService,
};

View file

@ -1,7 +1,10 @@
import React, { forwardRef } from 'react';
import { useWatch } from 'react-hook-form';
import type { TRealtimeEphemeralTokenResponse } from 'librechat-data-provider';
import type { Control } from 'react-hook-form';
import { useRealtimeEphemeralTokenMutation } from '~/data-provider';
import { TooltipAnchor, SendIcon, CallIcon } from '~/components';
import { useToastContext } from '~/Providers/ToastContext';
import { useLocalize } from '~/hooks';
import { cn } from '~/utils';
@ -17,6 +20,7 @@ const ActionButton = forwardRef(
icon: React.ReactNode;
tooltip: string;
testId: string;
onClick?: () => void;
},
ref: React.ForwardedRef<HTMLButtonElement>,
) => {
@ -36,6 +40,7 @@ const ActionButton = forwardRef(
)}
data-testid={props.testId}
type="submit"
onClick={props.onClick}
>
<span className="" data-state="closed">
{props.icon}
@ -49,19 +54,43 @@ const ActionButton = forwardRef(
const SendButton = forwardRef((props: ButtonProps, ref: React.ForwardedRef<HTMLButtonElement>) => {
const localize = useLocalize();
const { text } = useWatch({ control: props.control });
const { showToast } = useToastContext();
const { text = '' } = useWatch({ control: props.control });
const buttonProps = text
? {
icon: <SendIcon size={24} />,
tooltip: localize('com_nav_send_message'),
testId: 'send-button',
const { mutate: startCall, isLoading: isProcessing } = useRealtimeEphemeralTokenMutation({
onSuccess: async (data: TRealtimeEphemeralTokenResponse) => {
showToast({
message: 'IT WORKS!!',
status: 'success',
});
},
onError: (error: unknown) => {
showToast({
message: localize('com_nav_audio_process_error', (error as Error).message),
status: 'error',
});
},
});
const handleClick = () => {
if (text.trim() === '') {
startCall({ voice: 'verse' });
}
: {
icon: <CallIcon size={24} />,
tooltip: localize('com_nav_call'),
testId: 'call-button',
};
};
const buttonProps =
text.trim() !== ''
? {
icon: <SendIcon size={24} />,
tooltip: localize('com_nav_send_message'),
testId: 'send-button',
}
: {
icon: <CallIcon size={24} />,
tooltip: localize('com_nav_call'),
testId: 'call-button',
onClick: handleClick,
};
return <ActionButton ref={ref} disabled={props.disabled} {...buttonProps} />;
});

View file

@ -726,6 +726,21 @@ export const useTextToSpeechMutation = (
});
};
export const useRealtimeEphemeralTokenMutation = (
options?: t.MutationOptions<t.TRealtimeEphemeralTokenResponse, t.TRealtimeEphemeralTokenRequest>,
): UseMutationResult<
t.TRealtimeEphemeralTokenResponse,
unknown,
t.TRealtimeEphemeralTokenRequest,
unknown
> => {
return useMutation([MutationKeys.realtimeEphemeralToken], {
mutationFn: (data: t.TRealtimeEphemeralTokenRequest) =>
dataService.getRealtimeEphemeralToken(data),
...(options || {}),
});
};
/**
* ASSISTANTS
*/

View file

@ -171,7 +171,9 @@ export const textToSpeechManual = () => `${textToSpeech()}/manual`;
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
export const getCustomConfigSpeech = () => `${speech()}/config/get`;
export const getCustomConfigSpeech = () => `${speech()}/config`;
export const getRealtimeEphemeralToken = () => `${speech()}/realtime`;
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;

View file

@ -379,6 +379,18 @@ const speechTab = z
})
.optional();
const realtime = z
.object({
openai: z
.object({
url: z.string().optional(),
apiKey: z.string().optional(),
voices: z.array(z.string()).optional(),
})
.optional(),
})
.optional();
export enum RateLimitPrefix {
FILE_UPLOAD = 'FILE_UPLOAD',
IMPORT = 'IMPORT',
@ -534,6 +546,7 @@ export const configSchema = z.object({
tts: ttsSchema.optional(),
stt: sttSchema.optional(),
speechTab: speechTab.optional(),
realtime: realtime.optional(),
})
.optional(),
rateLimits: rateLimitSchema.optional(),
@ -1135,6 +1148,13 @@ export enum TTSProviders {
LOCALAI = 'localai',
}
export enum RealtimeVoiceProviders {
/**
* Provider for OpenAI Realtime Voice API
*/
OPENAI = 'openai',
}
/** Enum for app-wide constants */
export enum Constants {
/** Key for the app's version. */

View file

@ -576,6 +576,12 @@ export const getCustomConfigSpeech = (): Promise<t.TCustomConfigSpeechResponse>
return request.get(endpoints.getCustomConfigSpeech());
};
export const getRealtimeEphemeralToken = (
data: t.TRealtimeEphemeralTokenRequest,
): Promise<t.TRealtimeEphemeralTokenResponse> => {
return request.get(endpoints.getRealtimeEphemeralToken(), { params: data });
};
/* conversations */
export function duplicateConversation(

View file

@ -67,4 +67,5 @@ export enum MutationKeys {
deleteAgentAction = 'deleteAgentAction',
deleteUser = 'deleteUser',
updateRole = 'updateRole',
realtimeEphemeralToken = 'realtimeEphemeralToken',
}

View file

@ -10,6 +10,7 @@ import type {
TConversationTag,
TBanner,
} from './schemas';
import { string } from 'zod';
export type TOpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
export * from './schemas';
@ -472,3 +473,12 @@ export type TAcceptTermsResponse = {
};
export type TBannerResponse = TBanner | null;
export type TRealtimeEphemeralTokenRequest = {
voice: string;
};
export type TRealtimeEphemeralTokenResponse = {
token: string;
url: string;
};