mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 18:30:15 +01:00
✨ WIP: Implement Realtime Ephemeral Token functionality and update related components
This commit is contained in:
parent
12d7028a18
commit
6b90817ae0
12 changed files with 213 additions and 13 deletions
|
|
@ -3,7 +3,7 @@ const router = express.Router();
|
||||||
|
|
||||||
const { getCustomConfigSpeech } = require('~/server/services/Files/Audio');
|
const { getCustomConfigSpeech } = require('~/server/services/Files/Audio');
|
||||||
|
|
||||||
router.get('/get', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
await getCustomConfigSpeech(req, res);
|
await getCustomConfigSpeech(req, res);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ const { createTTSLimiters, createSTTLimiters } = require('~/server/middleware');
|
||||||
const stt = require('./stt');
|
const stt = require('./stt');
|
||||||
const tts = require('./tts');
|
const tts = require('./tts');
|
||||||
const customConfigSpeech = require('./customConfigSpeech');
|
const customConfigSpeech = require('./customConfigSpeech');
|
||||||
|
const realtime = require('./realtime');
|
||||||
|
|
||||||
const router = express.Router();
|
const router = express.Router();
|
||||||
|
|
||||||
|
|
@ -14,4 +15,6 @@ router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
|
||||||
|
|
||||||
router.use('/config', customConfigSpeech);
|
router.use('/config', customConfigSpeech);
|
||||||
|
|
||||||
|
router.use('/realtime', realtime);
|
||||||
|
|
||||||
module.exports = router;
|
module.exports = router;
|
||||||
|
|
|
||||||
10
api/server/routes/files/speech/realtime.js
Normal file
10
api/server/routes/files/speech/realtime.js
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
const express = require('express');
|
||||||
|
const router = express.Router();
|
||||||
|
|
||||||
|
const { getRealtimeConfig } = require('~/server/services/Files/Audio');
|
||||||
|
|
||||||
|
router.get('/', async (req, res) => {
|
||||||
|
await getRealtimeConfig(req, res);
|
||||||
|
});
|
||||||
|
|
||||||
|
module.exports = router;
|
||||||
102
api/server/services/Files/Audio/getRealtimeConfig.js
Normal file
102
api/server/services/Files/Audio/getRealtimeConfig.js
Normal file
|
|
@ -0,0 +1,102 @@
|
||||||
|
const { extractEnvVariable, RealtimeVoiceProviders } = require('librechat-data-provider');
|
||||||
|
const { getCustomConfig } = require('~/server/services/Config');
|
||||||
|
const { logger } = require('~/config');
|
||||||
|
|
||||||
|
class RealtimeService {
|
||||||
|
constructor(customConfig) {
|
||||||
|
this.customConfig = customConfig;
|
||||||
|
this.providerStrategies = {
|
||||||
|
[RealtimeVoiceProviders.OPENAI]: this.openaiProvider.bind(this),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static async getInstance() {
|
||||||
|
const customConfig = await getCustomConfig();
|
||||||
|
if (!customConfig) {
|
||||||
|
throw new Error('Custom config not found');
|
||||||
|
}
|
||||||
|
return new RealtimeService(customConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
async getProviderSchema() {
|
||||||
|
const realtimeSchema = this.customConfig.speech.realtime;
|
||||||
|
if (!realtimeSchema) {
|
||||||
|
throw new Error('No Realtime schema is set in config');
|
||||||
|
}
|
||||||
|
|
||||||
|
const providers = Object.entries(realtimeSchema).filter(
|
||||||
|
([, value]) => Object.keys(value).length > 0,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (providers.length !== 1) {
|
||||||
|
throw new Error(providers.length > 1 ? 'Multiple providers set' : 'No provider set');
|
||||||
|
}
|
||||||
|
|
||||||
|
return providers[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
async openaiProvider(schema, voice) {
|
||||||
|
const defaultRealtimeUrl = 'https://api.openai.com/v1/realtime';
|
||||||
|
const allowedVoices = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'sage', 'shimmer', 'verse'];
|
||||||
|
|
||||||
|
if (!voice) {
|
||||||
|
throw new Error('Voice not specified');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!allowedVoices.includes(voice)) {
|
||||||
|
throw new Error(`Invalid voice: ${voice}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const apiKey = extractEnvVariable(schema.apiKey);
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error('OpenAI API key not configured');
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch('https://api.openai.com/v1/realtime/sessions', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: 'gpt-4o-realtime-preview-2024-12-17',
|
||||||
|
modalities: ['audio', 'text'],
|
||||||
|
voice: voice,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
const token = response.json();
|
||||||
|
|
||||||
|
return {
|
||||||
|
provider: RealtimeVoiceProviders.OPENAI,
|
||||||
|
token: token,
|
||||||
|
url: schema.url || defaultRealtimeUrl,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async getRealtimeConfig(req, res) {
|
||||||
|
try {
|
||||||
|
const [provider, schema] = await this.getProviderSchema();
|
||||||
|
const strategy = this.providerStrategies[provider];
|
||||||
|
|
||||||
|
if (!strategy) {
|
||||||
|
throw new Error(`Unsupported provider: ${provider}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const voice = req.query.voice;
|
||||||
|
|
||||||
|
const config = strategy(schema, voice);
|
||||||
|
res.json(config);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('[RealtimeService] Config generation failed:', error);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getRealtimeConfig(req, res) {
|
||||||
|
const service = await RealtimeService.getInstance();
|
||||||
|
await service.getRealtimeConfig(req, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = getRealtimeConfig;
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
const getCustomConfigSpeech = require('./getCustomConfigSpeech');
|
const getCustomConfigSpeech = require('./getCustomConfigSpeech');
|
||||||
|
const getRealtimeConfig = require('./getRealtimeConfig');
|
||||||
const TTSService = require('./TTSService');
|
const TTSService = require('./TTSService');
|
||||||
const STTService = require('./STTService');
|
const STTService = require('./STTService');
|
||||||
const getVoices = require('./getVoices');
|
const getVoices = require('./getVoices');
|
||||||
|
|
@ -6,6 +7,7 @@ const getVoices = require('./getVoices');
|
||||||
module.exports = {
|
module.exports = {
|
||||||
getVoices,
|
getVoices,
|
||||||
getCustomConfigSpeech,
|
getCustomConfigSpeech,
|
||||||
|
getRealtimeConfig,
|
||||||
...STTService,
|
...STTService,
|
||||||
...TTSService,
|
...TTSService,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
import React, { forwardRef } from 'react';
|
import React, { forwardRef } from 'react';
|
||||||
import { useWatch } from 'react-hook-form';
|
import { useWatch } from 'react-hook-form';
|
||||||
|
import type { TRealtimeEphemeralTokenResponse } from 'librechat-data-provider';
|
||||||
import type { Control } from 'react-hook-form';
|
import type { Control } from 'react-hook-form';
|
||||||
|
import { useRealtimeEphemeralTokenMutation } from '~/data-provider';
|
||||||
import { TooltipAnchor, SendIcon, CallIcon } from '~/components';
|
import { TooltipAnchor, SendIcon, CallIcon } from '~/components';
|
||||||
|
import { useToastContext } from '~/Providers/ToastContext';
|
||||||
import { useLocalize } from '~/hooks';
|
import { useLocalize } from '~/hooks';
|
||||||
import { cn } from '~/utils';
|
import { cn } from '~/utils';
|
||||||
|
|
||||||
|
|
@ -17,6 +20,7 @@ const ActionButton = forwardRef(
|
||||||
icon: React.ReactNode;
|
icon: React.ReactNode;
|
||||||
tooltip: string;
|
tooltip: string;
|
||||||
testId: string;
|
testId: string;
|
||||||
|
onClick?: () => void;
|
||||||
},
|
},
|
||||||
ref: React.ForwardedRef<HTMLButtonElement>,
|
ref: React.ForwardedRef<HTMLButtonElement>,
|
||||||
) => {
|
) => {
|
||||||
|
|
@ -36,6 +40,7 @@ const ActionButton = forwardRef(
|
||||||
)}
|
)}
|
||||||
data-testid={props.testId}
|
data-testid={props.testId}
|
||||||
type="submit"
|
type="submit"
|
||||||
|
onClick={props.onClick}
|
||||||
>
|
>
|
||||||
<span className="" data-state="closed">
|
<span className="" data-state="closed">
|
||||||
{props.icon}
|
{props.icon}
|
||||||
|
|
@ -49,9 +54,32 @@ const ActionButton = forwardRef(
|
||||||
|
|
||||||
const SendButton = forwardRef((props: ButtonProps, ref: React.ForwardedRef<HTMLButtonElement>) => {
|
const SendButton = forwardRef((props: ButtonProps, ref: React.ForwardedRef<HTMLButtonElement>) => {
|
||||||
const localize = useLocalize();
|
const localize = useLocalize();
|
||||||
const { text } = useWatch({ control: props.control });
|
const { showToast } = useToastContext();
|
||||||
|
const { text = '' } = useWatch({ control: props.control });
|
||||||
|
|
||||||
const buttonProps = text
|
const { mutate: startCall, isLoading: isProcessing } = useRealtimeEphemeralTokenMutation({
|
||||||
|
onSuccess: async (data: TRealtimeEphemeralTokenResponse) => {
|
||||||
|
showToast({
|
||||||
|
message: 'IT WORKS!!',
|
||||||
|
status: 'success',
|
||||||
|
});
|
||||||
|
},
|
||||||
|
onError: (error: unknown) => {
|
||||||
|
showToast({
|
||||||
|
message: localize('com_nav_audio_process_error', (error as Error).message),
|
||||||
|
status: 'error',
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const handleClick = () => {
|
||||||
|
if (text.trim() === '') {
|
||||||
|
startCall({ voice: 'verse' });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const buttonProps =
|
||||||
|
text.trim() !== ''
|
||||||
? {
|
? {
|
||||||
icon: <SendIcon size={24} />,
|
icon: <SendIcon size={24} />,
|
||||||
tooltip: localize('com_nav_send_message'),
|
tooltip: localize('com_nav_send_message'),
|
||||||
|
|
@ -61,6 +89,7 @@ const SendButton = forwardRef((props: ButtonProps, ref: React.ForwardedRef<HTMLB
|
||||||
icon: <CallIcon size={24} />,
|
icon: <CallIcon size={24} />,
|
||||||
tooltip: localize('com_nav_call'),
|
tooltip: localize('com_nav_call'),
|
||||||
testId: 'call-button',
|
testId: 'call-button',
|
||||||
|
onClick: handleClick,
|
||||||
};
|
};
|
||||||
|
|
||||||
return <ActionButton ref={ref} disabled={props.disabled} {...buttonProps} />;
|
return <ActionButton ref={ref} disabled={props.disabled} {...buttonProps} />;
|
||||||
|
|
|
||||||
|
|
@ -726,6 +726,21 @@ export const useTextToSpeechMutation = (
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const useRealtimeEphemeralTokenMutation = (
|
||||||
|
options?: t.MutationOptions<t.TRealtimeEphemeralTokenResponse, t.TRealtimeEphemeralTokenRequest>,
|
||||||
|
): UseMutationResult<
|
||||||
|
t.TRealtimeEphemeralTokenResponse,
|
||||||
|
unknown,
|
||||||
|
t.TRealtimeEphemeralTokenRequest,
|
||||||
|
unknown
|
||||||
|
> => {
|
||||||
|
return useMutation([MutationKeys.realtimeEphemeralToken], {
|
||||||
|
mutationFn: (data: t.TRealtimeEphemeralTokenRequest) =>
|
||||||
|
dataService.getRealtimeEphemeralToken(data),
|
||||||
|
...(options || {}),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ASSISTANTS
|
* ASSISTANTS
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -171,7 +171,9 @@ export const textToSpeechManual = () => `${textToSpeech()}/manual`;
|
||||||
|
|
||||||
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
|
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
|
||||||
|
|
||||||
export const getCustomConfigSpeech = () => `${speech()}/config/get`;
|
export const getCustomConfigSpeech = () => `${speech()}/config`;
|
||||||
|
|
||||||
|
export const getRealtimeEphemeralToken = () => `${speech()}/realtime`;
|
||||||
|
|
||||||
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;
|
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -379,6 +379,18 @@ const speechTab = z
|
||||||
})
|
})
|
||||||
.optional();
|
.optional();
|
||||||
|
|
||||||
|
const realtime = z
|
||||||
|
.object({
|
||||||
|
openai: z
|
||||||
|
.object({
|
||||||
|
url: z.string().optional(),
|
||||||
|
apiKey: z.string().optional(),
|
||||||
|
voices: z.array(z.string()).optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
})
|
||||||
|
.optional();
|
||||||
|
|
||||||
export enum RateLimitPrefix {
|
export enum RateLimitPrefix {
|
||||||
FILE_UPLOAD = 'FILE_UPLOAD',
|
FILE_UPLOAD = 'FILE_UPLOAD',
|
||||||
IMPORT = 'IMPORT',
|
IMPORT = 'IMPORT',
|
||||||
|
|
@ -534,6 +546,7 @@ export const configSchema = z.object({
|
||||||
tts: ttsSchema.optional(),
|
tts: ttsSchema.optional(),
|
||||||
stt: sttSchema.optional(),
|
stt: sttSchema.optional(),
|
||||||
speechTab: speechTab.optional(),
|
speechTab: speechTab.optional(),
|
||||||
|
realtime: realtime.optional(),
|
||||||
})
|
})
|
||||||
.optional(),
|
.optional(),
|
||||||
rateLimits: rateLimitSchema.optional(),
|
rateLimits: rateLimitSchema.optional(),
|
||||||
|
|
@ -1135,6 +1148,13 @@ export enum TTSProviders {
|
||||||
LOCALAI = 'localai',
|
LOCALAI = 'localai',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export enum RealtimeVoiceProviders {
|
||||||
|
/**
|
||||||
|
* Provider for OpenAI Realtime Voice API
|
||||||
|
*/
|
||||||
|
OPENAI = 'openai',
|
||||||
|
}
|
||||||
|
|
||||||
/** Enum for app-wide constants */
|
/** Enum for app-wide constants */
|
||||||
export enum Constants {
|
export enum Constants {
|
||||||
/** Key for the app's version. */
|
/** Key for the app's version. */
|
||||||
|
|
|
||||||
|
|
@ -576,6 +576,12 @@ export const getCustomConfigSpeech = (): Promise<t.TCustomConfigSpeechResponse>
|
||||||
return request.get(endpoints.getCustomConfigSpeech());
|
return request.get(endpoints.getCustomConfigSpeech());
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getRealtimeEphemeralToken = (
|
||||||
|
data: t.TRealtimeEphemeralTokenRequest,
|
||||||
|
): Promise<t.TRealtimeEphemeralTokenResponse> => {
|
||||||
|
return request.get(endpoints.getRealtimeEphemeralToken(), { params: data });
|
||||||
|
};
|
||||||
|
|
||||||
/* conversations */
|
/* conversations */
|
||||||
|
|
||||||
export function duplicateConversation(
|
export function duplicateConversation(
|
||||||
|
|
|
||||||
|
|
@ -67,4 +67,5 @@ export enum MutationKeys {
|
||||||
deleteAgentAction = 'deleteAgentAction',
|
deleteAgentAction = 'deleteAgentAction',
|
||||||
deleteUser = 'deleteUser',
|
deleteUser = 'deleteUser',
|
||||||
updateRole = 'updateRole',
|
updateRole = 'updateRole',
|
||||||
|
realtimeEphemeralToken = 'realtimeEphemeralToken',
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import type {
|
||||||
TConversationTag,
|
TConversationTag,
|
||||||
TBanner,
|
TBanner,
|
||||||
} from './schemas';
|
} from './schemas';
|
||||||
|
import { string } from 'zod';
|
||||||
export type TOpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
|
export type TOpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
|
||||||
|
|
||||||
export * from './schemas';
|
export * from './schemas';
|
||||||
|
|
@ -472,3 +473,12 @@ export type TAcceptTermsResponse = {
|
||||||
};
|
};
|
||||||
|
|
||||||
export type TBannerResponse = TBanner | null;
|
export type TBannerResponse = TBanner | null;
|
||||||
|
|
||||||
|
export type TRealtimeEphemeralTokenRequest = {
|
||||||
|
voice: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type TRealtimeEphemeralTokenResponse = {
|
||||||
|
token: string;
|
||||||
|
url: string;
|
||||||
|
};
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue