support fetch tts/stt deepgram models from rest api (#457)

This commit is contained in:
Hoan Luu Huu
2025-05-28 20:59:02 +07:00
committed by GitHub
parent c7e279d0ee
commit bb5dba7c20
3 changed files with 104 additions and 5 deletions

View File

@@ -215,7 +215,8 @@ const encryptCredential = (obj) => {
if (!deepgram_stt_uri || !deepgram_tts_uri) {
assert(api_key, 'invalid deepgram speech credential: api_key is required');
}
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri,
deepgram_stt_use_tls, deepgram_tts_uri, model_id});
return encrypt(deepgramData);
case 'ibm':

View File

@@ -0,0 +1,52 @@
module.exports = [
// Nova-3
{ name: 'Nova-3', value: 'nova-3' },
{ name: 'Nova-3 General', value: 'nova-3-general' },
{ name: 'Nova-3 Medical', value: 'nova-3-medical' },
// Nova-2
{ name: 'Nova-2', value: 'nova-2' },
{ name: 'Nova-2 General', value: 'nova-2-general' },
{ name: 'Nova-2 Meeting', value: 'nova-2-meeting' },
{ name: 'Nova-2 Phonecall', value: 'nova-2-phonecall' },
{ name: 'Nova-2 Finance', value: 'nova-2-finance' },
{ name: 'Nova-2 Conversational AI', value: 'nova-2-conversationalai' },
{ name: 'Nova-2 Voicemail', value: 'nova-2-voicemail' },
{ name: 'Nova-2 Video', value: 'nova-2-video' },
{ name: 'Nova-2 Medical', value: 'nova-2-medical' },
{ name: 'Nova-2 Drivethru', value: 'nova-2-drivethru' },
{ name: 'Nova-2 Automotive', value: 'nova-2-automotive' },
{ name: 'Nova-2 ATC', value: 'nova-2-atc' },
// Nova (legacy)
{ name: 'Nova', value: 'nova' },
{ name: 'Nova General', value: 'nova-general' },
{ name: 'Nova Phonecall', value: 'nova-phonecall' },
{ name: 'Nova Medical', value: 'nova-medical' },
// Enhanced (legacy)
{ name: 'Enhanced', value: 'enhanced' },
{ name: 'Enhanced General', value: 'enhanced-general' },
{ name: 'Enhanced Meeting', value: 'enhanced-meeting' },
{ name: 'Enhanced Phonecall', value: 'enhanced-phonecall' },
{ name: 'Enhanced Finance', value: 'enhanced-finance' },
// Base (legacy)
{ name: 'Base', value: 'base' },
{ name: 'Base General', value: 'base-general' },
{ name: 'Base Meeting', value: 'base-meeting' },
{ name: 'Base Phonecall', value: 'base-phonecall' },
{ name: 'Base Finance', value: 'base-finance' },
{ name: 'Base Conversational AI', value: 'base-conversationalai' },
{ name: 'Base Voicemail', value: 'base-voicemail' },
{ name: 'Base Video', value: 'base-video' },
// Whisper
{ name: 'Whisper Tiny', value: 'whisper-tiny' },
{ name: 'Whisper Base', value: 'whisper-base' },
{ name: 'Whisper Small', value: 'whisper-small' },
{ name: 'Whisper Medium', value: 'whisper-medium' },
{ name: 'Whisper Large', value: 'whisper-large' },
{ name: 'Whisper', value: 'whisper' },
];

View File

@@ -48,6 +48,12 @@ const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
const SttModelOpenai = require('./speech-data/stt-model-openai');
const sttModelDeepgram = require('./speech-data/stt-model-deepgram');
function capitalizeFirst(str) {
if (!str) return str;
return str.charAt(0).toUpperCase() + str.slice(1);
}
const testSonioxStt = async(logger, credentials) => {
@@ -636,6 +642,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
obj.deepgram_stt_uri = o.deepgram_stt_uri;
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
obj.deepgram_tts_uri = o.deepgram_tts_uri;
obj.model_id = o.model_id;
}
else if ('ibm' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
@@ -851,8 +858,47 @@ async function getLanguagesVoicesForNuane(credential, getTtsVoices, logger) {
return tranform(TtsNuanceLanguagesVoices, SttNuanceLanguagesVoices);
}
async function getLanguagesVoicesForDeepgram(credential) {
return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices, TtsModelDeepgram);
async function getLanguagesVoicesForDeepgram(credential, getTtsVoices, logger) {
if (credential) {
const {model_id, api_key, deepgram_stt_uri, deepgram_tts_uri} = credential;
// currently just fetching STT and TTS models from Deepgram cloud
if (!deepgram_stt_uri && !deepgram_tts_uri) {
const response = await fetch('https://api.deepgram.com/v1/models', {
headers: {
'Authorization': `Token ${api_key}`
}
});
if (!response.ok) {
logger.error({response}, 'Error fetching Deepgram voices');
throw new Error('failed to list voices');
}
const {stt, tts} = await response.json();
let sttLangs = SttDeepgramLanguagesVoices;
const sttModels = Array.from(
new Map(
stt.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }])
).values()
).sort((a, b) => a.name.localeCompare(b.name));
const ttsModels = Array.from(
new Map(
tts.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }])
).values()
).sort((a, b) => a.name.localeCompare(b.name));
// if model_id is not provided, return all models, all voices, all languages
if (!model_id) {
return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels);
}
const selectedSttModel = stt.find((m) => m.canonical_name === model_id);
const selectedSttLangs = selectedSttModel ? selectedSttModel.languages : [];
sttLangs = SttDeepgramLanguagesVoices.filter((l) => {
return selectedSttLangs.includes(l.value);
});
return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels);
}
}
return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices,
TtsModelDeepgram, sttModelDeepgram.sort((a, b) => a.name.localeCompare(b.name)));
}
async function getLanguagesVoicesForIbm(credential, getTtsVoices, logger) {
@@ -1072,9 +1118,9 @@ async function getLanguagesVoicesForRimelabs(credential) {
Object.keys(voices).length > 0 ? voices[Object.keys(voices)[0]] : [];
const ttsVoices = Object.entries(modelVoices).map(([key, voices]) => ({
value: key,
name: key.charAt(0).toUpperCase() + key.slice(1),
name: capitalizeFirst(key),
voices: voices.map((v) => ({
name: v.charAt(0).toUpperCase() + v.slice(1),
name: capitalizeFirst(v),
value: v
}))
}));