mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2026-01-25 02:08:24 +00:00
support fetch tts/stt deepgram models from rest api (#457)
This commit is contained in:
@@ -215,7 +215,8 @@ const encryptCredential = (obj) => {
|
|||||||
if (!deepgram_stt_uri || !deepgram_tts_uri) {
|
if (!deepgram_stt_uri || !deepgram_tts_uri) {
|
||||||
assert(api_key, 'invalid deepgram speech credential: api_key is required');
|
assert(api_key, 'invalid deepgram speech credential: api_key is required');
|
||||||
}
|
}
|
||||||
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
|
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri,
|
||||||
|
deepgram_stt_use_tls, deepgram_tts_uri, model_id});
|
||||||
return encrypt(deepgramData);
|
return encrypt(deepgramData);
|
||||||
|
|
||||||
case 'ibm':
|
case 'ibm':
|
||||||
|
|||||||
52
lib/utils/speech-data/stt-model-deepgram.js
Normal file
52
lib/utils/speech-data/stt-model-deepgram.js
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
module.exports = [
|
||||||
|
// Nova-3
|
||||||
|
{ name: 'Nova-3', value: 'nova-3' },
|
||||||
|
{ name: 'Nova-3 General', value: 'nova-3-general' },
|
||||||
|
{ name: 'Nova-3 Medical', value: 'nova-3-medical' },
|
||||||
|
|
||||||
|
// Nova-2
|
||||||
|
{ name: 'Nova-2', value: 'nova-2' },
|
||||||
|
{ name: 'Nova-2 General', value: 'nova-2-general' },
|
||||||
|
{ name: 'Nova-2 Meeting', value: 'nova-2-meeting' },
|
||||||
|
{ name: 'Nova-2 Phonecall', value: 'nova-2-phonecall' },
|
||||||
|
{ name: 'Nova-2 Finance', value: 'nova-2-finance' },
|
||||||
|
{ name: 'Nova-2 Conversational AI', value: 'nova-2-conversationalai' },
|
||||||
|
{ name: 'Nova-2 Voicemail', value: 'nova-2-voicemail' },
|
||||||
|
{ name: 'Nova-2 Video', value: 'nova-2-video' },
|
||||||
|
{ name: 'Nova-2 Medical', value: 'nova-2-medical' },
|
||||||
|
{ name: 'Nova-2 Drivethru', value: 'nova-2-drivethru' },
|
||||||
|
{ name: 'Nova-2 Automotive', value: 'nova-2-automotive' },
|
||||||
|
{ name: 'Nova-2 ATC', value: 'nova-2-atc' },
|
||||||
|
|
||||||
|
// Nova (legacy)
|
||||||
|
{ name: 'Nova', value: 'nova' },
|
||||||
|
{ name: 'Nova General', value: 'nova-general' },
|
||||||
|
{ name: 'Nova Phonecall', value: 'nova-phonecall' },
|
||||||
|
{ name: 'Nova Medical', value: 'nova-medical' },
|
||||||
|
|
||||||
|
// Enhanced (legacy)
|
||||||
|
{ name: 'Enhanced', value: 'enhanced' },
|
||||||
|
{ name: 'Enhanced General', value: 'enhanced-general' },
|
||||||
|
{ name: 'Enhanced Meeting', value: 'enhanced-meeting' },
|
||||||
|
{ name: 'Enhanced Phonecall', value: 'enhanced-phonecall' },
|
||||||
|
{ name: 'Enhanced Finance', value: 'enhanced-finance' },
|
||||||
|
|
||||||
|
// Base (legacy)
|
||||||
|
{ name: 'Base', value: 'base' },
|
||||||
|
{ name: 'Base General', value: 'base-general' },
|
||||||
|
{ name: 'Base Meeting', value: 'base-meeting' },
|
||||||
|
{ name: 'Base Phonecall', value: 'base-phonecall' },
|
||||||
|
{ name: 'Base Finance', value: 'base-finance' },
|
||||||
|
{ name: 'Base Conversational AI', value: 'base-conversationalai' },
|
||||||
|
{ name: 'Base Voicemail', value: 'base-voicemail' },
|
||||||
|
{ name: 'Base Video', value: 'base-video' },
|
||||||
|
|
||||||
|
// Whisper
|
||||||
|
{ name: 'Whisper Tiny', value: 'whisper-tiny' },
|
||||||
|
{ name: 'Whisper Base', value: 'whisper-base' },
|
||||||
|
{ name: 'Whisper Small', value: 'whisper-small' },
|
||||||
|
{ name: 'Whisper Medium', value: 'whisper-medium' },
|
||||||
|
{ name: 'Whisper Large', value: 'whisper-large' },
|
||||||
|
{ name: 'Whisper', value: 'whisper' },
|
||||||
|
];
|
||||||
|
|
||||||
@@ -48,6 +48,12 @@ const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
|
|||||||
|
|
||||||
|
|
||||||
const SttModelOpenai = require('./speech-data/stt-model-openai');
|
const SttModelOpenai = require('./speech-data/stt-model-openai');
|
||||||
|
const sttModelDeepgram = require('./speech-data/stt-model-deepgram');
|
||||||
|
|
||||||
|
function capitalizeFirst(str) {
|
||||||
|
if (!str) return str;
|
||||||
|
return str.charAt(0).toUpperCase() + str.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const testSonioxStt = async(logger, credentials) => {
|
const testSonioxStt = async(logger, credentials) => {
|
||||||
@@ -636,6 +642,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
|||||||
obj.deepgram_stt_uri = o.deepgram_stt_uri;
|
obj.deepgram_stt_uri = o.deepgram_stt_uri;
|
||||||
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
|
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
|
||||||
obj.deepgram_tts_uri = o.deepgram_tts_uri;
|
obj.deepgram_tts_uri = o.deepgram_tts_uri;
|
||||||
|
obj.model_id = o.model_id;
|
||||||
}
|
}
|
||||||
else if ('ibm' === obj.vendor) {
|
else if ('ibm' === obj.vendor) {
|
||||||
const o = JSON.parse(decrypt(credential));
|
const o = JSON.parse(decrypt(credential));
|
||||||
@@ -851,8 +858,47 @@ async function getLanguagesVoicesForNuane(credential, getTtsVoices, logger) {
|
|||||||
return tranform(TtsNuanceLanguagesVoices, SttNuanceLanguagesVoices);
|
return tranform(TtsNuanceLanguagesVoices, SttNuanceLanguagesVoices);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getLanguagesVoicesForDeepgram(credential) {
|
async function getLanguagesVoicesForDeepgram(credential, getTtsVoices, logger) {
|
||||||
return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices, TtsModelDeepgram);
|
if (credential) {
|
||||||
|
const {model_id, api_key, deepgram_stt_uri, deepgram_tts_uri} = credential;
|
||||||
|
// currently just fetching STT and TTS models from Deepgram cloud
|
||||||
|
if (!deepgram_stt_uri && !deepgram_tts_uri) {
|
||||||
|
const response = await fetch('https://api.deepgram.com/v1/models', {
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Token ${api_key}`
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
logger.error({response}, 'Error fetching Deepgram voices');
|
||||||
|
throw new Error('failed to list voices');
|
||||||
|
}
|
||||||
|
const {stt, tts} = await response.json();
|
||||||
|
let sttLangs = SttDeepgramLanguagesVoices;
|
||||||
|
const sttModels = Array.from(
|
||||||
|
new Map(
|
||||||
|
stt.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }])
|
||||||
|
).values()
|
||||||
|
).sort((a, b) => a.name.localeCompare(b.name));
|
||||||
|
const ttsModels = Array.from(
|
||||||
|
new Map(
|
||||||
|
tts.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }])
|
||||||
|
).values()
|
||||||
|
).sort((a, b) => a.name.localeCompare(b.name));
|
||||||
|
// if model_id is not provided, return all models, all voices, all languages
|
||||||
|
if (!model_id) {
|
||||||
|
return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels);
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedSttModel = stt.find((m) => m.canonical_name === model_id);
|
||||||
|
const selectedSttLangs = selectedSttModel ? selectedSttModel.languages : [];
|
||||||
|
sttLangs = SttDeepgramLanguagesVoices.filter((l) => {
|
||||||
|
return selectedSttLangs.includes(l.value);
|
||||||
|
});
|
||||||
|
return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices,
|
||||||
|
TtsModelDeepgram, sttModelDeepgram.sort((a, b) => a.name.localeCompare(b.name)));
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getLanguagesVoicesForIbm(credential, getTtsVoices, logger) {
|
async function getLanguagesVoicesForIbm(credential, getTtsVoices, logger) {
|
||||||
@@ -1072,9 +1118,9 @@ async function getLanguagesVoicesForRimelabs(credential) {
|
|||||||
Object.keys(voices).length > 0 ? voices[Object.keys(voices)[0]] : [];
|
Object.keys(voices).length > 0 ? voices[Object.keys(voices)[0]] : [];
|
||||||
const ttsVoices = Object.entries(modelVoices).map(([key, voices]) => ({
|
const ttsVoices = Object.entries(modelVoices).map(([key, voices]) => ({
|
||||||
value: key,
|
value: key,
|
||||||
name: key.charAt(0).toUpperCase() + key.slice(1),
|
name: capitalizeFirst(key),
|
||||||
voices: voices.map((v) => ({
|
voices: voices.map((v) => ({
|
||||||
name: v.charAt(0).toUpperCase() + v.slice(1),
|
name: capitalizeFirst(v),
|
||||||
value: v
|
value: v
|
||||||
}))
|
}))
|
||||||
}));
|
}));
|
||||||
|
|||||||
Reference in New Issue
Block a user