mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2025-12-19 05:47:46 +00:00
support fetch tts/stt deepgram models from rest api (#457)
This commit is contained in:
@@ -215,7 +215,8 @@ const encryptCredential = (obj) => {
|
||||
if (!deepgram_stt_uri || !deepgram_tts_uri) {
|
||||
assert(api_key, 'invalid deepgram speech credential: api_key is required');
|
||||
}
|
||||
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
|
||||
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri,
|
||||
deepgram_stt_use_tls, deepgram_tts_uri, model_id});
|
||||
return encrypt(deepgramData);
|
||||
|
||||
case 'ibm':
|
||||
|
||||
52
lib/utils/speech-data/stt-model-deepgram.js
Normal file
52
lib/utils/speech-data/stt-model-deepgram.js
Normal file
@@ -0,0 +1,52 @@
|
||||
module.exports = [
|
||||
// Nova-3
|
||||
{ name: 'Nova-3', value: 'nova-3' },
|
||||
{ name: 'Nova-3 General', value: 'nova-3-general' },
|
||||
{ name: 'Nova-3 Medical', value: 'nova-3-medical' },
|
||||
|
||||
// Nova-2
|
||||
{ name: 'Nova-2', value: 'nova-2' },
|
||||
{ name: 'Nova-2 General', value: 'nova-2-general' },
|
||||
{ name: 'Nova-2 Meeting', value: 'nova-2-meeting' },
|
||||
{ name: 'Nova-2 Phonecall', value: 'nova-2-phonecall' },
|
||||
{ name: 'Nova-2 Finance', value: 'nova-2-finance' },
|
||||
{ name: 'Nova-2 Conversational AI', value: 'nova-2-conversationalai' },
|
||||
{ name: 'Nova-2 Voicemail', value: 'nova-2-voicemail' },
|
||||
{ name: 'Nova-2 Video', value: 'nova-2-video' },
|
||||
{ name: 'Nova-2 Medical', value: 'nova-2-medical' },
|
||||
{ name: 'Nova-2 Drivethru', value: 'nova-2-drivethru' },
|
||||
{ name: 'Nova-2 Automotive', value: 'nova-2-automotive' },
|
||||
{ name: 'Nova-2 ATC', value: 'nova-2-atc' },
|
||||
|
||||
// Nova (legacy)
|
||||
{ name: 'Nova', value: 'nova' },
|
||||
{ name: 'Nova General', value: 'nova-general' },
|
||||
{ name: 'Nova Phonecall', value: 'nova-phonecall' },
|
||||
{ name: 'Nova Medical', value: 'nova-medical' },
|
||||
|
||||
// Enhanced (legacy)
|
||||
{ name: 'Enhanced', value: 'enhanced' },
|
||||
{ name: 'Enhanced General', value: 'enhanced-general' },
|
||||
{ name: 'Enhanced Meeting', value: 'enhanced-meeting' },
|
||||
{ name: 'Enhanced Phonecall', value: 'enhanced-phonecall' },
|
||||
{ name: 'Enhanced Finance', value: 'enhanced-finance' },
|
||||
|
||||
// Base (legacy)
|
||||
{ name: 'Base', value: 'base' },
|
||||
{ name: 'Base General', value: 'base-general' },
|
||||
{ name: 'Base Meeting', value: 'base-meeting' },
|
||||
{ name: 'Base Phonecall', value: 'base-phonecall' },
|
||||
{ name: 'Base Finance', value: 'base-finance' },
|
||||
{ name: 'Base Conversational AI', value: 'base-conversationalai' },
|
||||
{ name: 'Base Voicemail', value: 'base-voicemail' },
|
||||
{ name: 'Base Video', value: 'base-video' },
|
||||
|
||||
// Whisper
|
||||
{ name: 'Whisper Tiny', value: 'whisper-tiny' },
|
||||
{ name: 'Whisper Base', value: 'whisper-base' },
|
||||
{ name: 'Whisper Small', value: 'whisper-small' },
|
||||
{ name: 'Whisper Medium', value: 'whisper-medium' },
|
||||
{ name: 'Whisper Large', value: 'whisper-large' },
|
||||
{ name: 'Whisper', value: 'whisper' },
|
||||
];
|
||||
|
||||
@@ -48,6 +48,12 @@ const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
|
||||
|
||||
|
||||
const SttModelOpenai = require('./speech-data/stt-model-openai');
|
||||
const sttModelDeepgram = require('./speech-data/stt-model-deepgram');
|
||||
|
||||
function capitalizeFirst(str) {
|
||||
if (!str) return str;
|
||||
return str.charAt(0).toUpperCase() + str.slice(1);
|
||||
}
|
||||
|
||||
|
||||
const testSonioxStt = async(logger, credentials) => {
|
||||
@@ -636,6 +642,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
obj.deepgram_stt_uri = o.deepgram_stt_uri;
|
||||
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
|
||||
obj.deepgram_tts_uri = o.deepgram_tts_uri;
|
||||
obj.model_id = o.model_id;
|
||||
}
|
||||
else if ('ibm' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
@@ -851,8 +858,47 @@ async function getLanguagesVoicesForNuane(credential, getTtsVoices, logger) {
|
||||
return tranform(TtsNuanceLanguagesVoices, SttNuanceLanguagesVoices);
|
||||
}
|
||||
|
||||
async function getLanguagesVoicesForDeepgram(credential) {
|
||||
return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices, TtsModelDeepgram);
|
||||
async function getLanguagesVoicesForDeepgram(credential, getTtsVoices, logger) {
|
||||
if (credential) {
|
||||
const {model_id, api_key, deepgram_stt_uri, deepgram_tts_uri} = credential;
|
||||
// currently just fetching STT and TTS models from Deepgram cloud
|
||||
if (!deepgram_stt_uri && !deepgram_tts_uri) {
|
||||
const response = await fetch('https://api.deepgram.com/v1/models', {
|
||||
headers: {
|
||||
'Authorization': `Token ${api_key}`
|
||||
}
|
||||
});
|
||||
if (!response.ok) {
|
||||
logger.error({response}, 'Error fetching Deepgram voices');
|
||||
throw new Error('failed to list voices');
|
||||
}
|
||||
const {stt, tts} = await response.json();
|
||||
let sttLangs = SttDeepgramLanguagesVoices;
|
||||
const sttModels = Array.from(
|
||||
new Map(
|
||||
stt.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }])
|
||||
).values()
|
||||
).sort((a, b) => a.name.localeCompare(b.name));
|
||||
const ttsModels = Array.from(
|
||||
new Map(
|
||||
tts.map((m) => [m.canonical_name, { name: capitalizeFirst(m.canonical_name), value: m.canonical_name }])
|
||||
).values()
|
||||
).sort((a, b) => a.name.localeCompare(b.name));
|
||||
// if model_id is not provided, return all models, all voices, all languages
|
||||
if (!model_id) {
|
||||
return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels);
|
||||
}
|
||||
|
||||
const selectedSttModel = stt.find((m) => m.canonical_name === model_id);
|
||||
const selectedSttLangs = selectedSttModel ? selectedSttModel.languages : [];
|
||||
sttLangs = SttDeepgramLanguagesVoices.filter((l) => {
|
||||
return selectedSttLangs.includes(l.value);
|
||||
});
|
||||
return tranform(TtsLanguagesDeepgram, sttLangs, ttsModels, sttModels);
|
||||
}
|
||||
}
|
||||
return tranform(TtsLanguagesDeepgram, SttDeepgramLanguagesVoices,
|
||||
TtsModelDeepgram, sttModelDeepgram.sort((a, b) => a.name.localeCompare(b.name)));
|
||||
}
|
||||
|
||||
async function getLanguagesVoicesForIbm(credential, getTtsVoices, logger) {
|
||||
@@ -1072,9 +1118,9 @@ async function getLanguagesVoicesForRimelabs(credential) {
|
||||
Object.keys(voices).length > 0 ? voices[Object.keys(voices)[0]] : [];
|
||||
const ttsVoices = Object.entries(modelVoices).map(([key, voices]) => ({
|
||||
value: key,
|
||||
name: key.charAt(0).toUpperCase() + key.slice(1),
|
||||
name: capitalizeFirst(key),
|
||||
voices: voices.map((v) => ({
|
||||
name: v.charAt(0).toUpperCase() + v.slice(1),
|
||||
name: capitalizeFirst(v),
|
||||
value: v
|
||||
}))
|
||||
}));
|
||||
|
||||
Reference in New Issue
Block a user