mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-19 01:40:15 +01:00
🗣️ feat: add support for gpt-4o-transcribe models (#6483)
This commit is contained in:
parent
842b68fc32
commit
20f353630e
2 changed files with 124 additions and 5 deletions
|
|
@ -21,6 +21,7 @@ const useSpeechToTextExternal = (
|
|||
const [isListening, setIsListening] = useState(false);
|
||||
const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
|
||||
const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
|
||||
const [audioMimeType, setAudioMimeType] = useState<string>('audio/webm');
|
||||
|
||||
const [minDecibels] = useRecoilState(store.decibelValue);
|
||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||
|
|
@ -48,6 +49,44 @@ const useSpeechToTextExternal = (
|
|||
},
|
||||
});
|
||||
|
||||
const getBestSupportedMimeType = () => {
|
||||
const types = [
|
||||
'audio/webm',
|
||||
'audio/webm;codecs=opus',
|
||||
'audio/mp4',
|
||||
'audio/ogg;codecs=opus',
|
||||
'audio/ogg',
|
||||
'audio/wav',
|
||||
];
|
||||
|
||||
for (const type of types) {
|
||||
if (MediaRecorder.isTypeSupported(type)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
const ua = navigator.userAgent.toLowerCase();
|
||||
if (ua.indexOf('safari') !== -1 && ua.indexOf('chrome') === -1) {
|
||||
return 'audio/mp4';
|
||||
} else if (ua.indexOf('firefox') !== -1) {
|
||||
return 'audio/ogg';
|
||||
} else {
|
||||
return 'audio/webm';
|
||||
}
|
||||
};
|
||||
|
||||
const getFileExtension = (mimeType: string) => {
|
||||
if (mimeType.includes('mp4')) {
|
||||
return 'm4a';
|
||||
} else if (mimeType.includes('ogg')) {
|
||||
return 'ogg';
|
||||
} else if (mimeType.includes('wav')) {
|
||||
return 'wav';
|
||||
} else {
|
||||
return 'webm';
|
||||
}
|
||||
};
|
||||
|
||||
const cleanup = () => {
|
||||
if (mediaRecorderRef.current) {
|
||||
mediaRecorderRef.current.removeEventListener('dataavailable', (event: BlobEvent) => {
|
||||
|
|
@ -73,12 +112,13 @@ const useSpeechToTextExternal = (
|
|||
|
||||
const handleStop = () => {
|
||||
if (audioChunks.length > 0) {
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
||||
const audioBlob = new Blob(audioChunks, { type: audioMimeType });
|
||||
const fileExtension = getFileExtension(audioMimeType);
|
||||
|
||||
setAudioChunks([]);
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('audio', audioBlob, 'audio.wav');
|
||||
formData.append('audio', audioBlob, `audio.${fileExtension}`);
|
||||
setIsRequestBeingMade(true);
|
||||
cleanup();
|
||||
processAudio(formData);
|
||||
|
|
@ -133,7 +173,12 @@ const useSpeechToTextExternal = (
|
|||
if (audioStream.current) {
|
||||
try {
|
||||
setAudioChunks([]);
|
||||
mediaRecorderRef.current = new MediaRecorder(audioStream.current);
|
||||
const bestMimeType = getBestSupportedMimeType();
|
||||
setAudioMimeType(bestMimeType);
|
||||
|
||||
mediaRecorderRef.current = new MediaRecorder(audioStream.current, {
|
||||
mimeType: bestMimeType,
|
||||
});
|
||||
mediaRecorderRef.current.addEventListener('dataavailable', (event: BlobEvent) => {
|
||||
audioChunks.push(event.data);
|
||||
});
|
||||
|
|
@ -221,7 +266,7 @@ const useSpeechToTextExternal = (
|
|||
return () => {
|
||||
window.removeEventListener('keydown', handleKeyDown);
|
||||
};
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
|
||||
}, [isListening]);
|
||||
|
||||
return {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue