mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 18:30:15 +01:00
✨ WIP: Implement Realtime Ephemeral Token functionality and update related components
This commit is contained in:
parent
12d7028a18
commit
6b90817ae0
12 changed files with 213 additions and 13 deletions
|
|
@ -3,7 +3,7 @@ const router = express.Router();
|
|||
|
||||
const { getCustomConfigSpeech } = require('~/server/services/Files/Audio');
|
||||
|
||||
router.get('/get', async (req, res) => {
|
||||
router.get('/', async (req, res) => {
|
||||
await getCustomConfigSpeech(req, res);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const { createTTSLimiters, createSTTLimiters } = require('~/server/middleware');
|
|||
const stt = require('./stt');
|
||||
const tts = require('./tts');
|
||||
const customConfigSpeech = require('./customConfigSpeech');
|
||||
const realtime = require('./realtime');
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
|
|
@ -14,4 +15,6 @@ router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
|
|||
|
||||
router.use('/config', customConfigSpeech);
|
||||
|
||||
router.use('/realtime', realtime);
|
||||
|
||||
module.exports = router;
|
||||
|
|
|
|||
10
api/server/routes/files/speech/realtime.js
Normal file
10
api/server/routes/files/speech/realtime.js
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
const express = require('express');
|
||||
const router = express.Router();
|
||||
|
||||
const { getRealtimeConfig } = require('~/server/services/Files/Audio');
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
await getRealtimeConfig(req, res);
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
102
api/server/services/Files/Audio/getRealtimeConfig.js
Normal file
102
api/server/services/Files/Audio/getRealtimeConfig.js
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
const { extractEnvVariable, RealtimeVoiceProviders } = require('librechat-data-provider');
|
||||
const { getCustomConfig } = require('~/server/services/Config');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class RealtimeService {
|
||||
constructor(customConfig) {
|
||||
this.customConfig = customConfig;
|
||||
this.providerStrategies = {
|
||||
[RealtimeVoiceProviders.OPENAI]: this.openaiProvider.bind(this),
|
||||
};
|
||||
}
|
||||
|
||||
static async getInstance() {
|
||||
const customConfig = await getCustomConfig();
|
||||
if (!customConfig) {
|
||||
throw new Error('Custom config not found');
|
||||
}
|
||||
return new RealtimeService(customConfig);
|
||||
}
|
||||
|
||||
async getProviderSchema() {
|
||||
const realtimeSchema = this.customConfig.speech.realtime;
|
||||
if (!realtimeSchema) {
|
||||
throw new Error('No Realtime schema is set in config');
|
||||
}
|
||||
|
||||
const providers = Object.entries(realtimeSchema).filter(
|
||||
([, value]) => Object.keys(value).length > 0,
|
||||
);
|
||||
|
||||
if (providers.length !== 1) {
|
||||
throw new Error(providers.length > 1 ? 'Multiple providers set' : 'No provider set');
|
||||
}
|
||||
|
||||
return providers[0];
|
||||
}
|
||||
|
||||
async openaiProvider(schema, voice) {
|
||||
const defaultRealtimeUrl = 'https://api.openai.com/v1/realtime';
|
||||
const allowedVoices = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'sage', 'shimmer', 'verse'];
|
||||
|
||||
if (!voice) {
|
||||
throw new Error('Voice not specified');
|
||||
}
|
||||
|
||||
if (!allowedVoices.includes(voice)) {
|
||||
throw new Error(`Invalid voice: ${voice}`);
|
||||
}
|
||||
|
||||
const apiKey = extractEnvVariable(schema.apiKey);
|
||||
if (!apiKey) {
|
||||
throw new Error('OpenAI API key not configured');
|
||||
}
|
||||
|
||||
const response = await fetch('https://api.openai.com/v1/realtime/sessions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'gpt-4o-realtime-preview-2024-12-17',
|
||||
modalities: ['audio', 'text'],
|
||||
voice: voice,
|
||||
}),
|
||||
});
|
||||
|
||||
const token = response.json();
|
||||
|
||||
return {
|
||||
provider: RealtimeVoiceProviders.OPENAI,
|
||||
token: token,
|
||||
url: schema.url || defaultRealtimeUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async getRealtimeConfig(req, res) {
|
||||
try {
|
||||
const [provider, schema] = await this.getProviderSchema();
|
||||
const strategy = this.providerStrategies[provider];
|
||||
|
||||
if (!strategy) {
|
||||
throw new Error(`Unsupported provider: ${provider}`);
|
||||
}
|
||||
|
||||
const voice = req.query.voice;
|
||||
|
||||
const config = strategy(schema, voice);
|
||||
res.json(config);
|
||||
} catch (error) {
|
||||
logger.error('[RealtimeService] Config generation failed:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function getRealtimeConfig(req, res) {
|
||||
const service = await RealtimeService.getInstance();
|
||||
await service.getRealtimeConfig(req, res);
|
||||
}
|
||||
|
||||
module.exports = getRealtimeConfig;
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
const getCustomConfigSpeech = require('./getCustomConfigSpeech');
|
||||
const getRealtimeConfig = require('./getRealtimeConfig');
|
||||
const TTSService = require('./TTSService');
|
||||
const STTService = require('./STTService');
|
||||
const getVoices = require('./getVoices');
|
||||
|
|
@ -6,6 +7,7 @@ const getVoices = require('./getVoices');
|
|||
module.exports = {
|
||||
getVoices,
|
||||
getCustomConfigSpeech,
|
||||
getRealtimeConfig,
|
||||
...STTService,
|
||||
...TTSService,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
import React, { forwardRef } from 'react';
|
||||
import { useWatch } from 'react-hook-form';
|
||||
import type { TRealtimeEphemeralTokenResponse } from 'librechat-data-provider';
|
||||
import type { Control } from 'react-hook-form';
|
||||
import { useRealtimeEphemeralTokenMutation } from '~/data-provider';
|
||||
import { TooltipAnchor, SendIcon, CallIcon } from '~/components';
|
||||
import { useToastContext } from '~/Providers/ToastContext';
|
||||
import { useLocalize } from '~/hooks';
|
||||
import { cn } from '~/utils';
|
||||
|
||||
|
|
@ -17,6 +20,7 @@ const ActionButton = forwardRef(
|
|||
icon: React.ReactNode;
|
||||
tooltip: string;
|
||||
testId: string;
|
||||
onClick?: () => void;
|
||||
},
|
||||
ref: React.ForwardedRef<HTMLButtonElement>,
|
||||
) => {
|
||||
|
|
@ -36,6 +40,7 @@ const ActionButton = forwardRef(
|
|||
)}
|
||||
data-testid={props.testId}
|
||||
type="submit"
|
||||
onClick={props.onClick}
|
||||
>
|
||||
<span className="" data-state="closed">
|
||||
{props.icon}
|
||||
|
|
@ -49,9 +54,32 @@ const ActionButton = forwardRef(
|
|||
|
||||
const SendButton = forwardRef((props: ButtonProps, ref: React.ForwardedRef<HTMLButtonElement>) => {
|
||||
const localize = useLocalize();
|
||||
const { text } = useWatch({ control: props.control });
|
||||
const { showToast } = useToastContext();
|
||||
const { text = '' } = useWatch({ control: props.control });
|
||||
|
||||
const buttonProps = text
|
||||
const { mutate: startCall, isLoading: isProcessing } = useRealtimeEphemeralTokenMutation({
|
||||
onSuccess: async (data: TRealtimeEphemeralTokenResponse) => {
|
||||
showToast({
|
||||
message: 'IT WORKS!!',
|
||||
status: 'success',
|
||||
});
|
||||
},
|
||||
onError: (error: unknown) => {
|
||||
showToast({
|
||||
message: localize('com_nav_audio_process_error', (error as Error).message),
|
||||
status: 'error',
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
const handleClick = () => {
|
||||
if (text.trim() === '') {
|
||||
startCall({ voice: 'verse' });
|
||||
}
|
||||
};
|
||||
|
||||
const buttonProps =
|
||||
text.trim() !== ''
|
||||
? {
|
||||
icon: <SendIcon size={24} />,
|
||||
tooltip: localize('com_nav_send_message'),
|
||||
|
|
@ -61,6 +89,7 @@ const SendButton = forwardRef((props: ButtonProps, ref: React.ForwardedRef<HTMLB
|
|||
icon: <CallIcon size={24} />,
|
||||
tooltip: localize('com_nav_call'),
|
||||
testId: 'call-button',
|
||||
onClick: handleClick,
|
||||
};
|
||||
|
||||
return <ActionButton ref={ref} disabled={props.disabled} {...buttonProps} />;
|
||||
|
|
|
|||
|
|
@ -726,6 +726,21 @@ export const useTextToSpeechMutation = (
|
|||
});
|
||||
};
|
||||
|
||||
export const useRealtimeEphemeralTokenMutation = (
|
||||
options?: t.MutationOptions<t.TRealtimeEphemeralTokenResponse, t.TRealtimeEphemeralTokenRequest>,
|
||||
): UseMutationResult<
|
||||
t.TRealtimeEphemeralTokenResponse,
|
||||
unknown,
|
||||
t.TRealtimeEphemeralTokenRequest,
|
||||
unknown
|
||||
> => {
|
||||
return useMutation([MutationKeys.realtimeEphemeralToken], {
|
||||
mutationFn: (data: t.TRealtimeEphemeralTokenRequest) =>
|
||||
dataService.getRealtimeEphemeralToken(data),
|
||||
...(options || {}),
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* ASSISTANTS
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -171,7 +171,9 @@ export const textToSpeechManual = () => `${textToSpeech()}/manual`;
|
|||
|
||||
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
|
||||
|
||||
export const getCustomConfigSpeech = () => `${speech()}/config/get`;
|
||||
export const getCustomConfigSpeech = () => `${speech()}/config`;
|
||||
|
||||
export const getRealtimeEphemeralToken = () => `${speech()}/realtime`;
|
||||
|
||||
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;
|
||||
|
||||
|
|
|
|||
|
|
@ -379,6 +379,18 @@ const speechTab = z
|
|||
})
|
||||
.optional();
|
||||
|
||||
const realtime = z
|
||||
.object({
|
||||
openai: z
|
||||
.object({
|
||||
url: z.string().optional(),
|
||||
apiKey: z.string().optional(),
|
||||
voices: z.array(z.string()).optional(),
|
||||
})
|
||||
.optional(),
|
||||
})
|
||||
.optional();
|
||||
|
||||
export enum RateLimitPrefix {
|
||||
FILE_UPLOAD = 'FILE_UPLOAD',
|
||||
IMPORT = 'IMPORT',
|
||||
|
|
@ -534,6 +546,7 @@ export const configSchema = z.object({
|
|||
tts: ttsSchema.optional(),
|
||||
stt: sttSchema.optional(),
|
||||
speechTab: speechTab.optional(),
|
||||
realtime: realtime.optional(),
|
||||
})
|
||||
.optional(),
|
||||
rateLimits: rateLimitSchema.optional(),
|
||||
|
|
@ -1135,6 +1148,13 @@ export enum TTSProviders {
|
|||
LOCALAI = 'localai',
|
||||
}
|
||||
|
||||
export enum RealtimeVoiceProviders {
|
||||
/**
|
||||
* Provider for OpenAI Realtime Voice API
|
||||
*/
|
||||
OPENAI = 'openai',
|
||||
}
|
||||
|
||||
/** Enum for app-wide constants */
|
||||
export enum Constants {
|
||||
/** Key for the app's version. */
|
||||
|
|
|
|||
|
|
@ -576,6 +576,12 @@ export const getCustomConfigSpeech = (): Promise<t.TCustomConfigSpeechResponse>
|
|||
return request.get(endpoints.getCustomConfigSpeech());
|
||||
};
|
||||
|
||||
export const getRealtimeEphemeralToken = (
|
||||
data: t.TRealtimeEphemeralTokenRequest,
|
||||
): Promise<t.TRealtimeEphemeralTokenResponse> => {
|
||||
return request.get(endpoints.getRealtimeEphemeralToken(), { params: data });
|
||||
};
|
||||
|
||||
/* conversations */
|
||||
|
||||
export function duplicateConversation(
|
||||
|
|
|
|||
|
|
@ -67,4 +67,5 @@ export enum MutationKeys {
|
|||
deleteAgentAction = 'deleteAgentAction',
|
||||
deleteUser = 'deleteUser',
|
||||
updateRole = 'updateRole',
|
||||
realtimeEphemeralToken = 'realtimeEphemeralToken',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import type {
|
|||
TConversationTag,
|
||||
TBanner,
|
||||
} from './schemas';
|
||||
import { string } from 'zod';
|
||||
export type TOpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
|
||||
|
||||
export * from './schemas';
|
||||
|
|
@ -472,3 +473,12 @@ export type TAcceptTermsResponse = {
|
|||
};
|
||||
|
||||
export type TBannerResponse = TBanner | null;
|
||||
|
||||
export type TRealtimeEphemeralTokenRequest = {
|
||||
voice: string;
|
||||
};
|
||||
|
||||
export type TRealtimeEphemeralTokenResponse = {
|
||||
token: string;
|
||||
url: string;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue