Feat/deepgram tts onprem (#338)

* support deepgram onpremise * wip * update speech utils version * install docker in ci
2026-01-25 02:08:24 +00:00 · 2024-08-07 18:24:58 +07:00
parent 7553e2b617
commit 2d2b98dab5
6 changed files with 93 additions and 17 deletions
--- a/lib/routes/api/speech-credentials.js
+++ b/lib/routes/api/speech-credentials.js
@@ -124,6 +124,7 @@ const encryptCredential = (obj) => {
    nuance_stt_uri,
    deepgram_stt_uri,
    deepgram_stt_use_tls,
+    deepgram_tts_uri,
    use_custom_tts,
    custom_tts_endpoint,
    custom_tts_endpoint_url,
@@ -204,10 +205,10 @@ const encryptCredential = (obj) => {

    case 'deepgram':
      // API key is optional if onprem
-      if (!deepgram_stt_uri) {
+      if (!deepgram_stt_uri || !deepgram_tts_uri) {
        assert(api_key, 'invalid deepgram speech credential: api_key is required');
      }
-      const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls});
+      const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
      return encrypt(deepgramData);

    case 'ibm':
@@ -458,6 +459,7 @@ router.put('/:sid', async(req, res) => {
          options,
          deepgram_stt_uri,
          deepgram_stt_use_tls,
+          deepgram_tts_uri,
          engine_version
        } = req.body;

@@ -485,6 +487,7 @@ router.put('/:sid', async(req, res) => {
          options,
          deepgram_stt_uri,
          deepgram_stt_use_tls,
+          deepgram_tts_uri,
          engine_version
        };
        logger.info({o, newCred}, 'updating speech credential with this new credential');
--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -92,8 +92,8 @@ const testGoogleStt = async(logger, credentials) => {
 };

 const testDeepgramStt = async(logger, credentials) => {
-  const {api_key} = credentials;
-  const deepgram = new Deepgram(api_key);
+  const {api_key, deepgram_stt_uri, deepgram_stt_use_tls} = credentials;
+  const deepgram = new Deepgram(api_key, deepgram_stt_uri, deepgram_stt_uri && deepgram_stt_use_tls);

  const mimetype = 'audio/wav';
  const source = {
@@ -272,7 +272,8 @@ const testPlayHT = async(logger, synthAudio, credentials) => {
        credentials,
        language: 'en-US',
        voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
-        text: 'Hi there and welcome to jambones!'
+        text: 'Hi there and welcome to jambones!',
+        renderForCaching: true
      }
    );
    // Test if playHT can fetch voices
@@ -295,7 +296,8 @@ const testRimelabs = async(logger, synthAudio, credentials) => {
        credentials,
        language: 'en-US',
        voice: 'amber',
-        text: 'Hi there and welcome to jambones!'
+        text: 'Hi there and welcome to jambones!',
+        renderForCaching: true
      }
    );
  } catch (err) {
@@ -312,7 +314,8 @@ const testWhisper = async(logger, synthAudio, credentials) => {
        credentials,
        language: 'en-US',
        voice: 'alloy',
-        text: 'Hi there and welcome to jambones!'
+        text: 'Hi there and welcome to jambones!',
+        renderForCaching: true
      }
    );
  } catch (err) {
@@ -328,7 +331,8 @@ const testDeepgramTTS = async(logger, synthAudio, credentials) => {
        vendor: 'deepgram',
        credentials,
        model: 'aura-asteria-en',
-        text: 'Hi there and welcome to jambones!'
+        text: 'Hi there and welcome to jambones!',
+        renderForCaching: true
      }
    );
  } catch (err) {
@@ -383,7 +387,8 @@ const testVerbioTts = async(logger, synthAudio, credentials) => {
        credentials,
        language: 'en-US',
        voice: 'tommy_en-us',
-        text: 'Hi there and welcome to jambones!'
+        text: 'Hi there and welcome to jambones!',
+        renderForCaching: true
      }
    );
  } catch (err) {
@@ -509,6 +514,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
    obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
    obj.deepgram_stt_uri = o.deepgram_stt_uri;
    obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
+    obj.deepgram_tts_uri = o.deepgram_tts_uri;
  }
  else if ('ibm' === obj.vendor) {
    const o = JSON.parse(decrypt(credential));