feat: enhance call functionality with VAD integration and mute handling

This commit is contained in:
Marco Beretta 2025-01-04 01:55:47 +01:00
parent 601cd4bf66
commit b72280bbcc
No known key found for this signature in database
GPG key ID: D918033D8E74CC11
7 changed files with 243 additions and 87 deletions

View file

@ -53,6 +53,7 @@
"@radix-ui/react-tabs": "^1.0.3",
"@radix-ui/react-toast": "^1.1.5",
"@react-spring/web": "^9.7.5",
"@ricky0123/vad-react": "^0.0.28",
"@tanstack/react-query": "^4.28.0",
"@tanstack/react-table": "^8.11.7",
"class-variance-authority": "^0.6.0",

View file

@ -72,7 +72,7 @@ export interface RTCMessage {
export type MessagePayload =
| RTCSessionDescriptionInit
| RTCIceCandidateInit
| Record<string, never>;
| { speaking: boolean };
export enum CallState {
IDLE = 'idle',

View file

@ -26,11 +26,12 @@ export const Call: React.FC = () => {
localStream,
remoteStream,
connectionQuality,
isMuted,
toggleMute,
} = useCall();
const [open, setOpen] = useRecoilState(store.callDialogOpen(0));
const [eventLog, setEventLog] = React.useState<string[]>([]);
const [isMuted, setIsMuted] = React.useState(false);
const [isAudioEnabled, setIsAudioEnabled] = React.useState(true);
const remoteAudioRef = useRef<HTMLAudioElement>(null);
@ -84,8 +85,8 @@ export const Call: React.FC = () => {
hangUp();
};
const toggleMute = () => {
setIsMuted((prev) => !prev);
const handleToggleMute = () => {
toggleMute();
logEvent(`Microphone ${isMuted ? 'unmuted' : 'muted'}`);
};
@ -176,7 +177,7 @@ export const Call: React.FC = () => {
{isActive && (
<>
<Button
onClick={toggleMute}
onClick={handleToggleMute}
className={`rounded-full p-3 ${
isMuted ? 'bg-red-100 text-red-700' : 'bg-gray-100 text-gray-700'
}`}
@ -218,25 +219,19 @@ export const Call: React.FC = () => {
</div>
{/* Event Log */}
<div className="mt-4 w-full rounded-md bg-gray-100 p-4 shadow-sm">
<h3 className="mb-2 text-lg font-medium">Event Log</h3>
<div className="h-32 overflow-y-auto rounded-md bg-white p-2 shadow-inner">
<ul className="space-y-1 text-xs text-gray-600">
{eventLog.map((log, index) => (
<li key={index} className="font-mono">
{log}
</li>
))}
</ul>
</div>
<h3 className="mb-2 text-lg font-medium">Event Log</h3>
<div className="h-64 overflow-y-auto rounded-md bg-surface-secondary p-2 shadow-inner">
<ul className="space-y-1 text-xs text-text-secondary">
{eventLog.map((log, index) => (
<li key={index} className="font-mono">
{log}
</li>
))}
</ul>
</div>
{/* Hidden Audio Element */}
<audio
ref={remoteAudioRef}
autoPlay
playsInline
>
<audio ref={remoteAudioRef} autoPlay>
<track kind="captions" />
</audio>
</div>

View file

@ -1,5 +1,5 @@
import { useState, useRef, useCallback, useEffect } from 'react';
import { WebRTCService, ConnectionState } from '../services/WebRTC/WebRTCService';
import { WebRTCService, ConnectionState, useVADSetup } from '../services/WebRTC/WebRTCService';
import useWebSocket, { WebSocketEvents } from './useWebSocket';
interface CallError {
@ -22,6 +22,8 @@ interface CallStatus {
localStream: MediaStream | null;
remoteStream: MediaStream | null;
connectionQuality: 'good' | 'poor' | 'unknown';
isUserSpeaking: boolean;
remoteAISpeaking: boolean;
}
const INITIAL_STATUS: CallStatus = {
@ -31,6 +33,8 @@ const INITIAL_STATUS: CallStatus = {
localStream: null,
remoteStream: null,
connectionQuality: 'unknown',
isUserSpeaking: false,
remoteAISpeaking: false,
};
const useCall = () => {
@ -38,33 +42,19 @@ const useCall = () => {
const [status, setStatus] = useState<CallStatus>(INITIAL_STATUS);
const webrtcServiceRef = useRef<WebRTCService | null>(null);
const statsIntervalRef = useRef<NodeJS.Timeout>();
const [isMuted, setIsMuted] = useState(false);
const vad = useVADSetup(webrtcServiceRef.current);
const updateStatus = useCallback((updates: Partial<CallStatus>) => {
setStatus((prev) => ({ ...prev, ...updates }));
}, []);
useEffect(() => {
return () => {
if (statsIntervalRef.current) {
clearInterval(statsIntervalRef.current);
}
if (webrtcServiceRef.current) {
webrtcServiceRef.current.close();
}
};
}, []);
updateStatus({ isUserSpeaking: vad.userSpeaking });
}, [vad.userSpeaking, updateStatus]);
const handleRemoteStream = (stream: MediaStream | null) => {
console.log('[WebRTC] Remote stream received:', {
stream: stream,
active: stream?.active,
tracks: stream?.getTracks().map((t) => ({
kind: t.kind,
enabled: t.enabled,
muted: t.muted,
})),
});
if (!stream) {
console.error('[WebRTC] Received null remote stream');
updateStatus({
@ -122,10 +112,8 @@ const useCall = () => {
break;
case ConnectionState.CLOSED:
updateStatus({
...INITIAL_STATUS,
callState: CallState.ENDED,
isConnecting: false,
localStream: null,
remoteStream: null,
});
break;
}
@ -188,17 +176,15 @@ const useCall = () => {
error: null,
});
// TODO: Remove debug or make it configurable
webrtcServiceRef.current = new WebRTCService((message) => sendMessage(message), {
webrtcServiceRef.current = new WebRTCService(sendMessage, {
debug: true,
});
webrtcServiceRef.current.on('connectionStateChange', (state: ConnectionState) => {
console.log('WebRTC connection state changed:', state);
handleConnectionStateChange(state);
});
webrtcServiceRef.current.on('connectionStateChange', handleConnectionStateChange);
webrtcServiceRef.current.on('remoteStream', handleRemoteStream);
webrtcServiceRef.current.on('vadStatusChange', (speaking: boolean) => {
updateStatus({ isUserSpeaking: speaking });
});
webrtcServiceRef.current.on('error', (error: string) => {
console.error('WebRTC error:', error);
@ -253,22 +239,42 @@ const useCall = () => {
useEffect(() => {
const cleanupFns = [
addEventListener(WebSocketEvents.WEBRTC_ANSWER, (answer: RTCSessionDescriptionInit) => {
console.log('Received WebRTC answer:', answer);
webrtcServiceRef.current?.handleAnswer(answer);
}),
addEventListener(WebSocketEvents.ICE_CANDIDATE, (candidate: RTCIceCandidateInit) => {
console.log('Received ICE candidate:', candidate);
webrtcServiceRef.current?.addIceCandidate(candidate);
}),
];
return () => cleanupFns.forEach((fn) => fn());
}, [addEventListener]);
}, [addEventListener, updateStatus]);
const toggleMute = useCallback(() => {
if (webrtcServiceRef.current) {
const newMutedState = !isMuted;
webrtcServiceRef.current.setMuted(newMutedState);
setIsMuted(newMutedState);
}
}, [isMuted]);
useEffect(() => {
if (webrtcServiceRef.current) {
const handleMuteChange = (muted: boolean) => setIsMuted(muted);
webrtcServiceRef.current.on('muteStateChange', handleMuteChange);
return () => {
webrtcServiceRef.current?.off('muteStateChange', handleMuteChange);
};
}
}, []);
return {
...status,
isMuted,
toggleMute,
startCall,
hangUp,
vadLoading: vad.loading,
vadError: vad.errored,
};
};

View file

@ -1,4 +1,6 @@
import { useEffect } from 'react';
import { EventEmitter } from 'events';
import { useMicVAD } from '@ricky0123/vad-react';
import type { MessagePayload } from '~/common';
export enum ConnectionState {
@ -24,6 +26,51 @@ interface WebRTCConfig {
debug?: boolean;
}
export function useVADSetup(webrtcService: WebRTCService | null) {
const vad = useMicVAD({
startOnLoad: true,
onSpeechStart: () => {
// Only emit speech events if not muted
if (webrtcService && !webrtcService.isMuted()) {
webrtcService.handleVADStatusChange(true);
}
},
onSpeechEnd: () => {
// Only emit speech events if not muted
if (webrtcService && !webrtcService.isMuted()) {
webrtcService.handleVADStatusChange(false);
}
},
onVADMisfire: () => {
if (webrtcService && !webrtcService.isMuted()) {
webrtcService.handleVADStatusChange(false);
}
},
});
// Add effect to handle mute state changes
useEffect(() => {
if (webrtcService) {
const handleMuteChange = (muted: boolean) => {
if (muted) {
// Stop VAD processing when muted
vad.pause();
} else {
// Resume VAD processing when unmuted
vad.start();
}
};
webrtcService.on('muteStateChange', handleMuteChange);
return () => {
webrtcService.off('muteStateChange', handleMuteChange);
};
}
}, [webrtcService, vad]);
return vad;
}
export class WebRTCService extends EventEmitter {
private peerConnection: RTCPeerConnection | null = null;
private localStream: MediaStream | null = null;
@ -34,6 +81,8 @@ export class WebRTCService extends EventEmitter {
private connectionState: ConnectionState = ConnectionState.IDLE;
private mediaState: MediaState = MediaState.INACTIVE;
private isUserSpeaking = false;
private readonly DEFAULT_CONFIG: Required<WebRTCConfig> = {
iceServers: [
{
@ -72,6 +121,76 @@ export class WebRTCService extends EventEmitter {
this.log('Media state changed to:', state);
}
public handleVADStatusChange(isSpeaking: boolean) {
if (this.isUserSpeaking !== isSpeaking) {
this.isUserSpeaking = isSpeaking;
this.sendMessage({
type: 'vad-status',
payload: { speaking: isSpeaking },
});
this.emit('vadStatusChange', isSpeaking);
}
}
public setMuted(muted: boolean) {
if (this.localStream) {
this.localStream.getAudioTracks().forEach((track) => {
// Stop the track completely when muted instead of just disabling
if (muted) {
track.stop();
} else {
// If unmuting, we need to get a new audio track
this.refreshAudioTrack();
}
});
if (muted) {
// Ensure VAD knows we're not speaking when muted
this.handleVADStatusChange(false);
}
this.emit('muteStateChange', muted);
}
}
public isMuted(): boolean {
if (!this.localStream) {
return false;
}
const audioTrack = this.localStream.getAudioTracks()[0];
return audioTrack ? !audioTrack.enabled : false;
}
private async refreshAudioTrack() {
try {
const newStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
const newTrack = newStream.getAudioTracks()[0];
if (this.localStream && this.peerConnection) {
const oldTrack = this.localStream.getAudioTracks()[0];
if (oldTrack) {
this.localStream.removeTrack(oldTrack);
}
this.localStream.addTrack(newTrack);
// Update the sender with the new track
const senders = this.peerConnection.getSenders();
const audioSender = senders.find((sender) => sender.track?.kind === 'audio');
if (audioSender) {
audioSender.replaceTrack(newTrack);
}
}
} catch (error) {
this.handleError(error);
}
}
async initialize() {
try {
this.setConnectionState(ConnectionState.CONNECTING);
@ -101,9 +220,7 @@ export class WebRTCService extends EventEmitter {
});
this.startConnectionTimeout();
await this.createAndSendOffer();
this.setMediaState(MediaState.ACTIVE);
} catch (error) {
this.log('Initialization error:', error);
@ -131,15 +248,12 @@ export class WebRTCService extends EventEmitter {
});
if (track.kind === 'audio') {
// Create remote stream if needed
if (!this.remoteStream) {
this.remoteStream = new MediaStream();
}
// Add incoming track to remote stream
this.remoteStream.addTrack(track);
// Echo back the track
if (this.peerConnection) {
this.peerConnection.addTrack(track, this.remoteStream);
}
@ -163,7 +277,7 @@ export class WebRTCService extends EventEmitter {
switch (state) {
case 'connected':
this.clearConnectionTimeout(); // Clear timeout when connected
this.clearConnectionTimeout();
this.setConnectionState(ConnectionState.CONNECTED);
break;
case 'disconnected':
@ -232,7 +346,6 @@ export class WebRTCService extends EventEmitter {
private startConnectionTimeout() {
this.clearConnectionTimeout();
this.connectionTimeoutId = setTimeout(() => {
// Only timeout if we're not in a connected or connecting state
if (
this.connectionState !== ConnectionState.CONNECTED &&
this.connectionState !== ConnectionState.CONNECTING
@ -276,13 +389,11 @@ export class WebRTCService extends EventEmitter {
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
this.log('Error:', errorMessage);
// Don't set failed state if we're already connected
if (this.connectionState !== ConnectionState.CONNECTED) {
this.setConnectionState(ConnectionState.FAILED);
this.emit('error', errorMessage);
}
// Only close if we're not connected
if (this.connectionState !== ConnectionState.CONNECTED) {
this.close();
}