add support for ibm speech credentials, and test for ibm tts (#82)

* add support for ibm speech credentials, and test for ibm tts * add stt testing for ibm watson
2026-01-25 02:08:24 +00:00 · 2022-11-21 21:16:26 -05:00
parent 5be286d3db
commit 5e070324ae
5 changed files with 1460 additions and 64 deletions
--- a/lib/routes/api/speech-credentials.js
+++ b/lib/routes/api/speech-credentials.js
@@ -15,7 +15,9 @@ const {
  testWellSaidTts,
  testNuanceStt,
  testNuanceTts,
-  testDeepgramStt
+  testDeepgramStt,
+  testIbmTts,
+  testIbmStt
 } = require('../../utils/speech-utils');

 const obscureKey = (key) => {
@@ -43,7 +45,11 @@ const encryptCredential = (obj) => {
    use_custom_tts,
    custom_tts_endpoint,
    use_custom_stt,
-    custom_stt_endpoint
+    custom_stt_endpoint,
+    tts_api_key,
+    tts_region,
+    stt_api_key,
+    stt_region
  } = obj;

  switch (vendor) {
@@ -94,6 +100,10 @@ const encryptCredential = (obj) => {
      const deepgramData = JSON.stringify({api_key});
      return encrypt(deepgramData);

+    case 'ibm':
+      const ibmData = JSON.stringify({tts_api_key, tts_region, stt_api_key, stt_region});
+      return encrypt(ibmData);
+
    default:
      assert(false, `invalid or missing vendor: ${vendor}`);
  }
@@ -268,7 +278,7 @@ router.put('/:sid', async(req, res) => {
  const sid = req.params.sid;
  const logger = req.app.locals.logger;
  try {
-    const {use_for_tts, use_for_stt, region, aws_region} = req.body;
+    const {use_for_tts, use_for_stt, region, aws_region, stt_region, tts_region} = req.body;
    if (typeof use_for_tts === 'undefined' && typeof use_for_stt === 'undefined') {
      throw new DbErrorUnprocessableRequest('use_for_tts and use_for_stt are the only updateable fields');
    }
@@ -301,7 +311,9 @@ router.put('/:sid', async(req, res) => {
          use_custom_tts,
          custom_tts_endpoint,
          use_custom_stt,
-          custom_stt_endpoint
+          custom_stt_endpoint,
+          stt_region,
+          tts_region
        };
        logger.info({o, newCred}, 'updating speech credential with this new credential');
        obj.credential = encryptCredential(newCred);
@@ -501,6 +513,40 @@ router.get('/:sid/test', async(req, res) => {
        }
      }
    }
+    else if (cred.vendor === 'ibm') {
+      const {getTtsVoices} = req.app.locals;
+
+      if (cred.use_for_tts) {
+        const {tts_api_key, tts_region} = credential;
+        try {
+          await testIbmTts(logger, getTtsVoices, {
+            tts_api_key,
+            tts_region
+          });
+          results.tts.status = 'ok';
+          SpeechCredential.ttsTestResult(sid, true);
+        } catch (err) {
+          logger.error({err}, 'error testing ibm tts');
+          const reason = err.statusCode === 401 ?
+            'invalid api_key or region' :
+            (err.message || 'error accessing ibm tts service with provided credentials');
+          results.tts = {status: 'fail', reason};
+          SpeechCredential.ttsTestResult(sid, false);
+        }
+      }
+      if (cred.use_for_stt) {
+        const {stt_api_key, stt_region} = credential;
+        try {
+          await testIbmStt(logger, {stt_region, stt_api_key});
+          results.stt.status = 'ok';
+          SpeechCredential.sttTestResult(sid, true);
+        } catch (err) {
+          results.stt = {status: 'fail', reason: err.message};
+          SpeechCredential.sttTestResult(sid, false);
+        }
+      }
+    }
+
    res.status(200).json(results);
  } catch (err) {
    sysError(logger, res, err);
--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -179,6 +179,35 @@ const testWellSaidTts = async(logger, credentials) => {
  }
 };

+const testIbmTts = async(logger, getTtsVoices, credentials) => {
+  const {tts_api_key, tts_region} = credentials;
+  const voices = await getTtsVoices({vendor: 'ibm', credentials: {api_key: tts_api_key, region: tts_region}});
+  return voices;
+};
+
+const testIbmStt = async(logger, credentials) => {
+  const {stt_api_key, stt_region} = credentials;
+  const SpeechToTextV1 = require('ibm-watson/speech-to-text/v1');
+  const { IamAuthenticator } = require('ibm-watson/auth');
+  const speechToText = new SpeechToTextV1({
+    authenticator: new IamAuthenticator({
+      apikey: stt_api_key
+    }),
+    serviceUrl: `https://api.${stt_region}.speech-to-text.watson.cloud.ibm.com`
+  });
+  return new Promise((resolve, reject) => {
+    speechToText.listModels()
+      .then((speechModels) => {
+        logger.debug({speechModels}, 'got IBM speech models');
+        return resolve();
+      })
+      .catch((err) => {
+        logger.info({err}, 'failed to get speech models');
+        reject(err);
+      });
+  });
+};
+
 const testWellSaidStt = async(logger, credentials) => {
  //TODO
  return true;
@@ -195,5 +224,7 @@ module.exports = {
  testWellSaidStt,
  testNuanceTts,
  testNuanceStt,
-  testDeepgramStt
+  testDeepgramStt,
+  testIbmTts,
+  testIbmStt
 };
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -22,7 +22,7 @@
    "@google-cloud/speech": "^5.1.0",
    "@google-cloud/text-to-speech": "^4.0.3",
    "@jambonz/db-helpers": "^0.7.3",
-    "@jambonz/realtimedb-helpers": "^0.5.9",
+    "@jambonz/realtimedb-helpers": "^0.6.0",
    "@jambonz/time-series": "^0.2.5",
    "argon2-ffi": "^2.0.0",
    "aws-sdk": "^2.1152.0",
@@ -34,6 +34,7 @@
    "form-data": "^2.5.1",
    "form-urlencoded": "^6.1.0",
    "helmet": "^5.1.0",
+    "ibm-watson": "^7.1.2",
    "jsonwebtoken": "^8.5.1",
    "mailgun.js": "^3.7.3",
    "microsoft-cognitiveservices-speech-sdk": "^1.24.1",
--- a/test/speech-credentials.js
+++ b/test/speech-credentials.js
@@ -234,7 +234,71 @@ test('speech credentials tests', async(t) => {
      });
      t.ok(result.statusCode === 204, 'successfully deleted speech credential');
    }
+    /* add a credential for ibm tts */
+    if (process.env.IBM_TTS_API_KEY && process.env.IBM_TTS_REGION) {
+      result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          vendor: 'ibm',
+          use_for_tts: true,
+          tts_api_key: process.env.IBM_TTS_API_KEY,
+          tts_region: process.env.IBM_TTS_REGION
+        }
+      });
+      t.ok(result.statusCode === 201, 'successfully added speech credential for ibm');
+      const ms_sid = result.body.sid;

+      /* test the speech credential */
+      result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,   
+      });
+      //console.log(JSON.stringify(result));
+      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for ibm tts');
+
+      /* delete the credential */
+      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
+        auth: authUser,
+        resolveWithFullResponse: true,
+      });
+      t.ok(result.statusCode === 204, 'successfully deleted speech credential');
+    }
+
+    /* add a credential for ibm stt */
+    if (process.env.IBM_STT_API_KEY && process.env.IBM_STT_REGION) {
+      result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          vendor: 'ibm',
+          use_for_stt: true,
+          stt_api_key: process.env.IBM_STT_API_KEY,
+          stt_region: process.env.IBM_STT_REGION
+        }
+      });
+      t.ok(result.statusCode === 201, 'successfully added speech credential for ibm');
+      const ms_sid = result.body.sid;
+
+      /* test the speech credential */
+      result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,   
+      });
+      //console.log(JSON.stringify(result));
+      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for ibm stt');
+
+      /* delete the credential */
+      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
+        auth: authUser,
+        resolveWithFullResponse: true,
+      });
+      t.ok(result.statusCode === 204, 'successfully deleted speech credential');
+    }

    await deleteObjectBySid(request, '/Accounts', account_sid);
    await deleteObjectBySid(request, '/ServiceProviders', service_provider_sid);