diff --git a/lib/routes/api/speech-credentials.js b/lib/routes/api/speech-credentials.js index 1b75a11..121ecd6 100644 --- a/lib/routes/api/speech-credentials.js +++ b/lib/routes/api/speech-credentials.js @@ -215,7 +215,8 @@ const encryptCredential = (obj) => { if (!deepgram_stt_uri || !deepgram_tts_uri) { assert(api_key, 'invalid deepgram speech credential: api_key is required'); } - const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri}); + const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, + deepgram_stt_use_tls, deepgram_tts_uri, model_id}); return encrypt(deepgramData); case 'ibm': diff --git a/lib/utils/speech-data/stt-model-deepgram.js b/lib/utils/speech-data/stt-model-deepgram.js new file mode 100644 index 0000000..fb121bb --- /dev/null +++ b/lib/utils/speech-data/stt-model-deepgram.js @@ -0,0 +1,52 @@ +module.exports = [ + // Nova-3 + { name: 'Nova-3', value: 'nova-3' }, + { name: 'Nova-3 General', value: 'nova-3-general' }, + { name: 'Nova-3 Medical', value: 'nova-3-medical' }, + + // Nova-2 + { name: 'Nova-2', value: 'nova-2' }, + { name: 'Nova-2 General', value: 'nova-2-general' }, + { name: 'Nova-2 Meeting', value: 'nova-2-meeting' }, + { name: 'Nova-2 Phonecall', value: 'nova-2-phonecall' }, + { name: 'Nova-2 Finance', value: 'nova-2-finance' }, + { name: 'Nova-2 Conversational AI', value: 'nova-2-conversationalai' }, + { name: 'Nova-2 Voicemail', value: 'nova-2-voicemail' }, + { name: 'Nova-2 Video', value: 'nova-2-video' }, + { name: 'Nova-2 Medical', value: 'nova-2-medical' }, + { name: 'Nova-2 Drivethru', value: 'nova-2-drivethru' }, + { name: 'Nova-2 Automotive', value: 'nova-2-automotive' }, + { name: 'Nova-2 ATC', value: 'nova-2-atc' }, + + // Nova (legacy) + { name: 'Nova', value: 'nova' }, + { name: 'Nova General', value: 'nova-general' }, + { name: 'Nova Phonecall', value: 'nova-phonecall' }, + { name: 'Nova Medical', value: 'nova-medical' }, + + // Enhanced (legacy) + { name: 'Enhanced', value: 'enhanced' }, + { name: 'Enhanced General', value: 'enhanced-general' }, + { name: 'Enhanced Meeting', value: 'enhanced-meeting' }, + { name: 'Enhanced Phonecall', value: 'enhanced-phonecall' }, + { name: 'Enhanced Finance', value: 'enhanced-finance' }, + + // Base (legacy) + { name: 'Base', value: 'base' }, + { name: 'Base General', value: 'base-general' }, + { name: 'Base Meeting', value: 'base-meeting' }, + { name: 'Base Phonecall', value: 'base-phonecall' }, + { name: 'Base Finance', value: 'base-finance' }, + { name: 'Base Conversational AI', value: 'base-conversationalai' }, + { name: 'Base Voicemail', value: 'base-voicemail' }, + { name: 'Base Video', value: 'base-video' }, + + // Whisper + { name: 'Whisper Tiny', value: 'whisper-tiny' }, + { name: 'Whisper Base', value: 'whisper-base' }, + { name: 'Whisper Small', value: 'whisper-small' }, + { name: 'Whisper Medium', value: 'whisper-medium' }, + { name: 'Whisper Large', value: 'whisper-large' }, + { name: 'Whisper', value: 'whisper' }, +]; + diff --git a/lib/utils/speech-utils.js b/lib/utils/speech-utils.js index 2a3049d..819faab 100644 --- a/lib/utils/speech-utils.js +++ b/lib/utils/speech-utils.js @@ -48,6 +48,12 @@ const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai'); const SttModelOpenai = require('./speech-data/stt-model-openai'); +const sttModelDeepgram = require('./speech-data/stt-model-deepgram'); + +function capitalizeFirst(str) { + if (!str) return str; + return str.charAt(0).toUpperCase() + str.slice(1); +} const testSonioxStt = async(logger, credentials) => { @@ -636,6 +642,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) { obj.deepgram_stt_uri = o.deepgram_stt_uri; obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls; obj.deepgram_tts_uri = o.deepgram_tts_uri; + obj.model_id = o.model_id; } else if ('ibm' === obj.vendor) { const o = JSON.parse(decrypt(credential)); @@ -851,8 +858,47 @@ async function getLanguagesVoicesForNuane(credential, getTtsVoices, logger) { return tranform(TtsNuanceLanguagesVoices, SttNuanceLanguagesVoices); } -async function getLanguagesVoicesForDeepgram(credential) { - return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices, TtsModelDeepgram); +async function getLanguagesVoicesForDeepgram(credential, getTtsVoices, logger) { + if (credential) { + const {model_id, api_key, deepgram_stt_uri, deepgram_tts_uri} = credential; + // currently just fetching STT and TTS models from Deepgram cloud + if (!deepgram_stt_uri && !deepgram_tts_uri) { + const response = await fetch('https://api.deepgram.com/v1/models', { + headers: { + 'Authorization': `Token ${api_key}` + } + }); + if (!response.ok) { + logger.error({response}, 'Error fetching Deepgram voices'); + throw new Error('failed to list voices'); + } + const {stt, tts} = await response.json(); + let sttLangs = SttDeepgramLanguagesVoices; + const sttModels = Array.from( + new Map( + stt.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }]) + ).values() + ).sort((a, b) => a.name.localeCompare(b.name)); + const ttsModels = Array.from( + new Map( + tts.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }]) + ).values() + ).sort((a, b) => a.name.localeCompare(b.name)); + // if model_id is not provided, return all models, all voices, all languages + if (!model_id) { + return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels); + } + + const selectedSttModel = stt.find((m) => m.canonical_name === model_id); + const selectedSttLangs = selectedSttModel ? selectedSttModel.languages : []; + sttLangs = SttDeepgramLanguagesVoices.filter((l) => { + return selectedSttLangs.includes(l.value); + }); + return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels); + } + } + return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices, + TtsModelDeepgram, sttModelDeepgram.sort((a, b) => a.name.localeCompare(b.name))); } async function getLanguagesVoicesForIbm(credential, getTtsVoices, logger) { @@ -1072,9 +1118,9 @@ async function getLanguagesVoicesForRimelabs(credential) { Object.keys(voices).length > 0 ? voices[Object.keys(voices)[0]] : []; const ttsVoices = Object.entries(modelVoices).map(([key, voices]) => ({ value: key, - name: key.charAt(0).toUpperCase() + key.slice(1), + name: capitalizeFirst(key), voices: voices.map((v) => ({ - name: v.charAt(0).toUpperCase() + v.slice(1), + name: capitalizeFirst(v), value: v })) }));