Feat/deepgram tts onprem (#338)

* support deepgram onpremise

* wip

* update speech utils version

* install docker in ci
This commit is contained in:
Hoan Luu Huu
2024-08-07 18:24:58 +07:00
committed by GitHub
parent 7553e2b617
commit 2d2b98dab5
6 changed files with 93 additions and 17 deletions

View File

@@ -124,6 +124,7 @@ const encryptCredential = (obj) => {
nuance_stt_uri,
deepgram_stt_uri,
deepgram_stt_use_tls,
deepgram_tts_uri,
use_custom_tts,
custom_tts_endpoint,
custom_tts_endpoint_url,
@@ -204,10 +205,10 @@ const encryptCredential = (obj) => {
case 'deepgram':
// API key is optional if onprem
if (!deepgram_stt_uri) {
if (!deepgram_stt_uri || !deepgram_tts_uri) {
assert(api_key, 'invalid deepgram speech credential: api_key is required');
}
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls});
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
return encrypt(deepgramData);
case 'ibm':
@@ -458,6 +459,7 @@ router.put('/:sid', async(req, res) => {
options,
deepgram_stt_uri,
deepgram_stt_use_tls,
deepgram_tts_uri,
engine_version
} = req.body;
@@ -485,6 +487,7 @@ router.put('/:sid', async(req, res) => {
options,
deepgram_stt_uri,
deepgram_stt_use_tls,
deepgram_tts_uri,
engine_version
};
logger.info({o, newCred}, 'updating speech credential with this new credential');

View File

@@ -92,8 +92,8 @@ const testGoogleStt = async(logger, credentials) => {
};
const testDeepgramStt = async(logger, credentials) => {
const {api_key} = credentials;
const deepgram = new Deepgram(api_key);
const {api_key, deepgram_stt_uri, deepgram_stt_use_tls} = credentials;
const deepgram = new Deepgram(api_key, deepgram_stt_uri, deepgram_stt_uri && deepgram_stt_use_tls);
const mimetype = 'audio/wav';
const source = {
@@ -272,7 +272,8 @@ const testPlayHT = async(logger, synthAudio, credentials) => {
credentials,
language: 'en-US',
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
text: 'Hi there and welcome to jambones!'
text: 'Hi there and welcome to jambones!',
renderForCaching: true
}
);
// Test if playHT can fetch voices
@@ -295,7 +296,8 @@ const testRimelabs = async(logger, synthAudio, credentials) => {
credentials,
language: 'en-US',
voice: 'amber',
text: 'Hi there and welcome to jambones!'
text: 'Hi there and welcome to jambones!',
renderForCaching: true
}
);
} catch (err) {
@@ -312,7 +314,8 @@ const testWhisper = async(logger, synthAudio, credentials) => {
credentials,
language: 'en-US',
voice: 'alloy',
text: 'Hi there and welcome to jambones!'
text: 'Hi there and welcome to jambones!',
renderForCaching: true
}
);
} catch (err) {
@@ -328,7 +331,8 @@ const testDeepgramTTS = async(logger, synthAudio, credentials) => {
vendor: 'deepgram',
credentials,
model: 'aura-asteria-en',
text: 'Hi there and welcome to jambones!'
text: 'Hi there and welcome to jambones!',
renderForCaching: true
}
);
} catch (err) {
@@ -383,7 +387,8 @@ const testVerbioTts = async(logger, synthAudio, credentials) => {
credentials,
language: 'en-US',
voice: 'tommy_en-us',
text: 'Hi there and welcome to jambones!'
text: 'Hi there and welcome to jambones!',
renderForCaching: true
}
);
} catch (err) {
@@ -509,6 +514,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.deepgram_stt_uri = o.deepgram_stt_uri;
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
obj.deepgram_tts_uri = o.deepgram_tts_uri;
}
else if ('ibm' === obj.vendor) {
const o = JSON.parse(decrypt(credential));