update node image to the latest and most secure (#81 )

Co-authored-by: Guilherme Rauen <g.rauen@cognigy.com>
add real testing of microsoft stt
2026-01-25 02:08:24 +00:00 · 2022-11-11 17:44:40 -05:00 · 2022-11-10 18:11:04 -05:00 · 2022-11-05 10:44:19 -04:00 · 2022-11-04 08:31:41 -04:00 · 2022-11-02 13:39:33 -04:00
6 changed files with 2054 additions and 2402 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # Logs
 logs
 *.log
+run-tests.sh

 # Runtime data
 pids
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-FROM --platform=linux/amd64 node:18.9.0-alpine3.16 as base
+FROM --platform=linux/amd64 node:18.12.1-alpine3.16 as base

 RUN apk --update --no-cache add --virtual .builds-deps build-base python3

--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -2,6 +2,8 @@ const ttsGoogle = require('@google-cloud/text-to-speech');
 const sttGoogle = require('@google-cloud/speech').v1p1beta1;
 const Polly = require('aws-sdk/clients/polly');
 const AWS = require('aws-sdk');
+const { Deepgram } = require('@deepgram/sdk');
+const sdk = require('microsoft-cognitiveservices-speech-sdk');
 const bent = require('bent');
 const fs = require('fs');

@@ -32,6 +34,65 @@ const testGoogleStt = async(logger, credentials) => {
  }
 };

+const testDeepgramStt = async(logger, credentials) => {
+  const {api_key} = credentials;
+  const deepgram = new Deepgram(api_key);
+
+  const mimetype = 'audio/wav';
+  const source = {
+    buffer: fs.readFileSync(`${__dirname}/../../data/test_audio.wav`),
+    mimetype: mimetype
+  };
+
+  return new Promise((resolve, reject) => {
+    // Send the audio to Deepgram and get the response
+    deepgram.transcription
+      .preRecorded(source, {punctuate: true})
+      .then((response) => {
+        //logger.debug({response}, 'got transcript');
+        if (response?.results?.channels[0]?.alternatives?.length > 0) resolve(response);
+        else reject(new Error('no transcript returned'));
+        return;
+      })
+      .catch((err) => {
+        logger.info({err}, 'failed to get deepgram transcript');
+        reject(err);
+      });
+  });
+};
+
+const testMicrosoftStt = async(logger, credentials) => {
+  const {api_key, region} = credentials;
+
+  const speechConfig = sdk.SpeechConfig.fromSubscription(api_key, region);
+  const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(`${__dirname}/../../data/test_audio.wav`));
+  speechConfig.speechRecognitionLanguage = 'en-US';
+  const speechRecognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
+
+  return new Promise((resolve, reject) => {
+    speechRecognizer.recognizeOnceAsync((result) => {
+      switch (result.reason) {
+        case sdk.ResultReason.RecognizedSpeech:
+          resolve();
+          break;
+        case sdk.ResultReason.NoMatch:
+          reject('Speech could not be recognized.');
+          break;
+        case sdk.ResultReason.Canceled:
+          const cancellation = sdk.CancellationDetails.fromResult(result);
+          logger.info(`CANCELED: Reason=${cancellation.reason}`);
+          if (cancellation.reason == sdk.CancellationReason.Error) {
+            logger.info(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
+            logger.info(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
+          }
+          reject(cancellation.reason);
+          break;
+      }
+      speechRecognizer.close();
+    });
+  });
+};
+
 const testAwsTts = (logger, credentials) => {
  const polly = new Polly(credentials);
  return new Promise((resolve, reject) => {
@@ -89,11 +150,6 @@ const testMicrosoftTts = async(logger, credentials) => {
  }
 };

-const testMicrosoftStt = async(logger, credentials) => {
-  //TODO
-  return true;
-};
-
 const testWellSaidTts = async(logger, credentials) => {
  const {api_key} = credentials;
  try {
@@ -127,4 +183,5 @@ module.exports = {
  testMicrosoftTts,
  testMicrosoftStt,
  testWellSaidStt,
+  testDeepgramStt
 };
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -18,10 +18,11 @@
    "url": "https://github.com/jambonz/jambonz-api-server.git"
  },
  "dependencies": {
-    "@google-cloud/speech": "^4.10.2",
-    "@google-cloud/text-to-speech": "^3.4.0",
-    "@jambonz/db-helpers": "^0.6.19",
-    "@jambonz/realtimedb-helpers": "^0.4.35",
+    "@deepgram/sdk": "^1.10.2",
+    "@google-cloud/speech": "^5.1.0",
+    "@google-cloud/text-to-speech": "^4.0.3",
+    "@jambonz/db-helpers": "^0.7.3",
+    "@jambonz/realtimedb-helpers": "^0.5.9",
    "@jambonz/time-series": "^0.2.5",
    "argon2-ffi": "^2.0.0",
    "aws-sdk": "^2.1152.0",
@@ -35,6 +36,7 @@
    "helmet": "^5.1.0",
    "jsonwebtoken": "^8.5.1",
    "mailgun.js": "^3.7.3",
+    "microsoft-cognitiveservices-speech-sdk": "^1.24.1",
    "mysql2": "^2.3.3",
    "passport": "^0.6.0",
    "passport-http-bearer": "^1.0.1",
--- a/test/speech-credentials.js
+++ b/test/speech-credentials.js
@@ -30,7 +30,9 @@ test('speech credentials tests', async(t) => {
      json: true,
      body: {
        vendor: 'google',
-        service_key: jsonKey
+        service_key: jsonKey,
+        use_for_tts: true,
+        use_for_stt: true
      }
    });
    t.ok(result.statusCode === 201, 'successfully added a speech credential to service provider');
@@ -61,7 +63,9 @@ test('speech credentials tests', async(t) => {
      json: true,
      body: {
        vendor: 'google',
-        service_key: jsonKey
+        service_key: jsonKey,
+        use_for_tts: true,
+        use_for_stt: true
      }
    });
    t.ok(result.statusCode === 201, 'successfully added speech credential');
@@ -110,20 +114,20 @@ test('speech credentials tests', async(t) => {
    });
    t.ok(result.statusCode === 204, 'successfully deleted speech credential');

-    /* add a credential for microsoft */
-    if (process.env.MICROSOFT_API_KEY && process.env.MICROSOFT_REGION) {
+    /* add / test a credential for google */
+    if (process.env.GCP_JSON_KEY) {
      result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
        resolveWithFullResponse: true,
        auth: authUser,
        json: true,
        body: {
-          vendor: 'microsoft',
+          vendor: 'google',
          use_for_tts: true,
-          api_key: process.env.MICROSOFT_API_KEY,
-          region: process.env.MICROSOFT_REGION
+          use_for_stt: true,
+          service_key: process.env.GCP_JSON_KEY
        }
      });
-      t.ok(result.statusCode === 201, 'successfully added speech credential');
+      t.ok(result.statusCode === 201, 'successfully added speech credential for google');
      const ms_sid = result.body.sid;

      /* test the speech credential */
@@ -132,7 +136,37 @@ test('speech credentials tests', async(t) => {
        auth: authUser,
        json: true,   
      });
-      console.log(JSON.stringify(result));
+      //console.log(JSON.stringify(result));
+      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for google tts');
+      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for google stt');
+    }
+
+    /* add / test a credential for microsoft */
+    if (process.env.MICROSOFT_API_KEY && process.env.MICROSOFT_REGION) {
+      result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          vendor: 'microsoft',
+          use_for_tts: true,
+          use_for_stt: true,
+          api_key: process.env.MICROSOFT_API_KEY,
+          region: process.env.MICROSOFT_REGION
+        }
+      });
+      t.ok(result.statusCode === 201, 'successfully added speech credential for microsoft');
+      const ms_sid = result.body.sid;
+
+      /* test the speech credential */
+      result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,   
+      });
+      //console.log(JSON.stringify(result));
+      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for microsoft tts');
+      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for microsoft stt');
    }

    /* add a credential for wellsaid */
@@ -156,7 +190,8 @@ test('speech credentials tests', async(t) => {
        auth: authUser,
        json: true,   
      });
-      console.log(JSON.stringify(result));
+      //console.log(JSON.stringify(result));
+      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for wellsaid');

      /* delete the credential */
      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
Author	SHA1	Message	Date
Guilherme Rauen	0fc3c95cc5	update node image to the latest and most secure (#81 ) Co-authored-by: Guilherme Rauen <g.rauen@cognigy.com>	2022-11-11 17:44:40 -05:00
Dave Horton	aeebf6aee0	add real testing of microsoft stt	2022-11-10 18:11:04 -05:00
Dave Horton	8026f45467	update deps	2022-11-05 10:44:19 -04:00
Dave Horton	505884e68e	update deps	2022-11-04 08:31:41 -04:00
Dave Horton	9e4d6eb88b	update deps	2022-11-02 13:39:33 -04:00
Dave Horton	e1cfbe5010	update db-helpers	2022-11-01 21:21:38 -04:00
Dave Horton	b8dc0b2f05	update google speech libs	2022-10-31 11:15:40 -04:00