mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
🗣️ feat: add support for gpt-4o-transcribe models (#6483)
This commit is contained in:
parent
842b68fc32
commit
20f353630e
2 changed files with 124 additions and 5 deletions
|
|
@ -7,6 +7,78 @@ const { getCustomConfig } = require('~/server/services/Config');
|
||||||
const { genAzureEndpoint } = require('~/utils');
|
const { genAzureEndpoint } = require('~/utils');
|
||||||
const { logger } = require('~/config');
|
const { logger } = require('~/config');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps MIME types to their corresponding file extensions for audio files.
|
||||||
|
* @type {Object}
|
||||||
|
*/
|
||||||
|
const MIME_TO_EXTENSION_MAP = {
|
||||||
|
// MP4 container formats
|
||||||
|
'audio/mp4': 'm4a',
|
||||||
|
'audio/x-m4a': 'm4a',
|
||||||
|
// Ogg formats
|
||||||
|
'audio/ogg': 'ogg',
|
||||||
|
'audio/vorbis': 'ogg',
|
||||||
|
'application/ogg': 'ogg',
|
||||||
|
// Wave formats
|
||||||
|
'audio/wav': 'wav',
|
||||||
|
'audio/x-wav': 'wav',
|
||||||
|
'audio/wave': 'wav',
|
||||||
|
// MP3 formats
|
||||||
|
'audio/mp3': 'mp3',
|
||||||
|
'audio/mpeg': 'mp3',
|
||||||
|
'audio/mpeg3': 'mp3',
|
||||||
|
// WebM formats
|
||||||
|
'audio/webm': 'webm',
|
||||||
|
// Additional formats
|
||||||
|
'audio/flac': 'flac',
|
||||||
|
'audio/x-flac': 'flac',
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the file extension from the MIME type.
|
||||||
|
* @param {string} mimeType - The MIME type.
|
||||||
|
* @returns {string} The file extension.
|
||||||
|
*/
|
||||||
|
function getFileExtensionFromMime(mimeType) {
|
||||||
|
// Default fallback
|
||||||
|
if (!mimeType) {
|
||||||
|
return 'webm';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Direct lookup (fastest)
|
||||||
|
const extension = MIME_TO_EXTENSION_MAP[mimeType];
|
||||||
|
if (extension) {
|
||||||
|
return extension;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to extract subtype as fallback
|
||||||
|
const subtype = mimeType.split('/')[1]?.toLowerCase();
|
||||||
|
|
||||||
|
// If subtype matches a known extension
|
||||||
|
if (['mp3', 'mp4', 'ogg', 'wav', 'webm', 'm4a', 'flac'].includes(subtype)) {
|
||||||
|
return subtype === 'mp4' ? 'm4a' : subtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generic checks for partial matches
|
||||||
|
if (subtype?.includes('mp4') || subtype?.includes('m4a')) {
|
||||||
|
return 'm4a';
|
||||||
|
}
|
||||||
|
if (subtype?.includes('ogg')) {
|
||||||
|
return 'ogg';
|
||||||
|
}
|
||||||
|
if (subtype?.includes('wav')) {
|
||||||
|
return 'wav';
|
||||||
|
}
|
||||||
|
if (subtype?.includes('mp3') || subtype?.includes('mpeg')) {
|
||||||
|
return 'mp3';
|
||||||
|
}
|
||||||
|
if (subtype?.includes('webm')) {
|
||||||
|
return 'webm';
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'webm'; // Default fallback
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Service class for handling Speech-to-Text (STT) operations.
|
* Service class for handling Speech-to-Text (STT) operations.
|
||||||
* @class
|
* @class
|
||||||
|
|
@ -170,8 +242,10 @@ class STTService {
|
||||||
throw new Error('Invalid provider');
|
throw new Error('Invalid provider');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fileExtension = getFileExtensionFromMime(audioFile.mimetype);
|
||||||
|
|
||||||
const audioReadStream = Readable.from(audioBuffer);
|
const audioReadStream = Readable.from(audioBuffer);
|
||||||
audioReadStream.path = 'audio.wav';
|
audioReadStream.path = `audio.${fileExtension}`;
|
||||||
|
|
||||||
const [url, data, headers] = strategy.call(this, sttSchema, audioReadStream, audioFile);
|
const [url, data, headers] = strategy.call(this, sttSchema, audioReadStream, audioFile);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ const useSpeechToTextExternal = (
|
||||||
const [isListening, setIsListening] = useState(false);
|
const [isListening, setIsListening] = useState(false);
|
||||||
const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
|
const [audioChunks, setAudioChunks] = useState<Blob[]>([]);
|
||||||
const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
|
const [isRequestBeingMade, setIsRequestBeingMade] = useState(false);
|
||||||
|
const [audioMimeType, setAudioMimeType] = useState<string>('audio/webm');
|
||||||
|
|
||||||
const [minDecibels] = useRecoilState(store.decibelValue);
|
const [minDecibels] = useRecoilState(store.decibelValue);
|
||||||
const [autoSendText] = useRecoilState(store.autoSendText);
|
const [autoSendText] = useRecoilState(store.autoSendText);
|
||||||
|
|
@ -48,6 +49,44 @@ const useSpeechToTextExternal = (
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const getBestSupportedMimeType = () => {
|
||||||
|
const types = [
|
||||||
|
'audio/webm',
|
||||||
|
'audio/webm;codecs=opus',
|
||||||
|
'audio/mp4',
|
||||||
|
'audio/ogg;codecs=opus',
|
||||||
|
'audio/ogg',
|
||||||
|
'audio/wav',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const type of types) {
|
||||||
|
if (MediaRecorder.isTypeSupported(type)) {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const ua = navigator.userAgent.toLowerCase();
|
||||||
|
if (ua.indexOf('safari') !== -1 && ua.indexOf('chrome') === -1) {
|
||||||
|
return 'audio/mp4';
|
||||||
|
} else if (ua.indexOf('firefox') !== -1) {
|
||||||
|
return 'audio/ogg';
|
||||||
|
} else {
|
||||||
|
return 'audio/webm';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFileExtension = (mimeType: string) => {
|
||||||
|
if (mimeType.includes('mp4')) {
|
||||||
|
return 'm4a';
|
||||||
|
} else if (mimeType.includes('ogg')) {
|
||||||
|
return 'ogg';
|
||||||
|
} else if (mimeType.includes('wav')) {
|
||||||
|
return 'wav';
|
||||||
|
} else {
|
||||||
|
return 'webm';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const cleanup = () => {
|
const cleanup = () => {
|
||||||
if (mediaRecorderRef.current) {
|
if (mediaRecorderRef.current) {
|
||||||
mediaRecorderRef.current.removeEventListener('dataavailable', (event: BlobEvent) => {
|
mediaRecorderRef.current.removeEventListener('dataavailable', (event: BlobEvent) => {
|
||||||
|
|
@ -73,12 +112,13 @@ const useSpeechToTextExternal = (
|
||||||
|
|
||||||
const handleStop = () => {
|
const handleStop = () => {
|
||||||
if (audioChunks.length > 0) {
|
if (audioChunks.length > 0) {
|
||||||
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
const audioBlob = new Blob(audioChunks, { type: audioMimeType });
|
||||||
|
const fileExtension = getFileExtension(audioMimeType);
|
||||||
|
|
||||||
setAudioChunks([]);
|
setAudioChunks([]);
|
||||||
|
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('audio', audioBlob, 'audio.wav');
|
formData.append('audio', audioBlob, `audio.${fileExtension}`);
|
||||||
setIsRequestBeingMade(true);
|
setIsRequestBeingMade(true);
|
||||||
cleanup();
|
cleanup();
|
||||||
processAudio(formData);
|
processAudio(formData);
|
||||||
|
|
@ -133,7 +173,12 @@ const useSpeechToTextExternal = (
|
||||||
if (audioStream.current) {
|
if (audioStream.current) {
|
||||||
try {
|
try {
|
||||||
setAudioChunks([]);
|
setAudioChunks([]);
|
||||||
mediaRecorderRef.current = new MediaRecorder(audioStream.current);
|
const bestMimeType = getBestSupportedMimeType();
|
||||||
|
setAudioMimeType(bestMimeType);
|
||||||
|
|
||||||
|
mediaRecorderRef.current = new MediaRecorder(audioStream.current, {
|
||||||
|
mimeType: bestMimeType,
|
||||||
|
});
|
||||||
mediaRecorderRef.current.addEventListener('dataavailable', (event: BlobEvent) => {
|
mediaRecorderRef.current.addEventListener('dataavailable', (event: BlobEvent) => {
|
||||||
audioChunks.push(event.data);
|
audioChunks.push(event.data);
|
||||||
});
|
});
|
||||||
|
|
@ -221,7 +266,7 @@ const useSpeechToTextExternal = (
|
||||||
return () => {
|
return () => {
|
||||||
window.removeEventListener('keydown', handleKeyDown);
|
window.removeEventListener('keydown', handleKeyDown);
|
||||||
};
|
};
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
||||||
}, [isListening]);
|
}, [isListening]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue