From 0fc8500361a48aeef611c4d03cbc092e191b6728 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Thu, 10 Nov 2022 17:54:40 -0500 Subject: [PATCH] added testing of microsoft stt credential --- lib/utils/speech-utils.js | 38 +++++++++++++++++++++---- package-lock.json | 13 +++++---- package.json | 1 + test/speech-credentials.js | 57 ++++++++++++++++++++++++++++++-------- 4 files changed, 86 insertions(+), 23 deletions(-) diff --git a/lib/utils/speech-utils.js b/lib/utils/speech-utils.js index 8f456c2..bc1a876 100644 --- a/lib/utils/speech-utils.js +++ b/lib/utils/speech-utils.js @@ -3,6 +3,7 @@ const sttGoogle = require('@google-cloud/speech').v1p1beta1; const Polly = require('aws-sdk/clients/polly'); const AWS = require('aws-sdk'); const { Deepgram } = require('@deepgram/sdk'); +const sdk = require('microsoft-cognitiveservices-speech-sdk'); const bent = require('bent'); const fs = require('fs'); @@ -70,6 +71,38 @@ const testDeepgramStt = async(logger, credentials) => { }); }; +const testMicrosoftStt = async(logger, credentials) => { + const {api_key, region} = credentials; + + const speechConfig = sdk.SpeechConfig.fromSubscription(api_key, region); + const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(`${__dirname}/../../data/test_audio.wav`)); + speechConfig.speechRecognitionLanguage = 'en-US'; + const speechRecognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig); + + return new Promise((resolve, reject) => { + speechRecognizer.recognizeOnceAsync((result) => { + switch (result.reason) { + case sdk.ResultReason.RecognizedSpeech: + resolve(); + break; + case sdk.ResultReason.NoMatch: + reject('Speech could not be recognized.'); + break; + case sdk.ResultReason.Canceled: + const cancellation = sdk.CancellationDetails.fromResult(result); + logger.info(`CANCELED: Reason=${cancellation.reason}`); + if (cancellation.reason == sdk.CancellationReason.Error) { + logger.info(`CANCELED: ErrorCode=${cancellation.ErrorCode}`); + logger.info(`CANCELED: ErrorDetails=${cancellation.errorDetails}`); + } + reject(cancellation.reason); + break; + } + speechRecognizer.close(); + }); + }); +}; + const testAwsTts = (logger, credentials) => { const polly = new Polly(credentials); return new Promise((resolve, reject) => { @@ -127,11 +160,6 @@ const testMicrosoftTts = async(logger, credentials) => { } }; -const testMicrosoftStt = async(logger, credentials) => { - //TODO - return true; -}; - const testWellSaidTts = async(logger, credentials) => { const {api_key} = credentials; try { diff --git a/package-lock.json b/package-lock.json index cfacacb..6969d18 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,6 +27,7 @@ "helmet": "^5.1.0", "jsonwebtoken": "^8.5.1", "mailgun.js": "^3.7.3", + "microsoft-cognitiveservices-speech-sdk": "^1.24.1", "mysql2": "^2.3.3", "passport": "^0.6.0", "passport-http-bearer": "^1.0.1", @@ -4308,9 +4309,9 @@ } }, "node_modules/microsoft-cognitiveservices-speech-sdk": { - "version": "1.24.0", - "resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.24.0.tgz", - "integrity": "sha512-26/lmzsRHgIzP03DSts1pqp+5UINdpDo35jnmi9QRzhYebtLrdEydwPR9TF0wa9TB+vbMeMFWD+gy9VKfWXAvQ==", + "version": "1.24.1", + "resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.24.1.tgz", + "integrity": "sha512-7oAlVge4cPnCeNHeIVUQe4tKZmfGtsriD8rjl7uAoPcwG4hF3BXVVhUEkhlW+B8i5zVAJl3fH4BbAfZPCtrbvg==", "hasInstallScript": true, "dependencies": { "agent-base": "^6.0.1", @@ -10023,9 +10024,9 @@ "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==" }, "microsoft-cognitiveservices-speech-sdk": { - "version": "1.24.0", - "resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.24.0.tgz", - "integrity": "sha512-26/lmzsRHgIzP03DSts1pqp+5UINdpDo35jnmi9QRzhYebtLrdEydwPR9TF0wa9TB+vbMeMFWD+gy9VKfWXAvQ==", + "version": "1.24.1", + "resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.24.1.tgz", + "integrity": "sha512-7oAlVge4cPnCeNHeIVUQe4tKZmfGtsriD8rjl7uAoPcwG4hF3BXVVhUEkhlW+B8i5zVAJl3fH4BbAfZPCtrbvg==", "requires": { "agent-base": "^6.0.1", "asn1.js-rfc2560": "^5.0.1", diff --git a/package.json b/package.json index 3c6440a..a19b1bf 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "helmet": "^5.1.0", "jsonwebtoken": "^8.5.1", "mailgun.js": "^3.7.3", + "microsoft-cognitiveservices-speech-sdk": "^1.24.1", "mysql2": "^2.3.3", "passport": "^0.6.0", "passport-http-bearer": "^1.0.1", diff --git a/test/speech-credentials.js b/test/speech-credentials.js index ca0cf84..a6f67a6 100644 --- a/test/speech-credentials.js +++ b/test/speech-credentials.js @@ -30,7 +30,9 @@ test('speech credentials tests', async(t) => { json: true, body: { vendor: 'google', - service_key: jsonKey + service_key: jsonKey, + use_for_tts: true, + use_for_stt: true } }); t.ok(result.statusCode === 201, 'successfully added a speech credential to service provider'); @@ -63,7 +65,9 @@ test('speech credentials tests', async(t) => { json: true, body: { vendor: 'google', - service_key: jsonKey + service_key: jsonKey, + use_for_tts: true, + use_for_stt: true } }); t.ok(result.statusCode === 201, 'successfully added speech credential'); @@ -112,20 +116,20 @@ test('speech credentials tests', async(t) => { }); t.ok(result.statusCode === 204, 'successfully deleted speech credential'); - /* add a credential for microsoft */ - if (process.env.MICROSOFT_API_KEY && process.env.MICROSOFT_REGION) { + /* add / test a credential for google */ + if (process.env.GCP_JSON_KEY) { result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, { resolveWithFullResponse: true, auth: authUser, json: true, body: { - vendor: 'microsoft', + vendor: 'google', use_for_tts: true, - api_key: process.env.MICROSOFT_API_KEY, - region: process.env.MICROSOFT_REGION + use_for_stt: true, + service_key: process.env.GCP_JSON_KEY } }); - t.ok(result.statusCode === 201, 'successfully added speech credential'); + t.ok(result.statusCode === 201, 'successfully added speech credential for google'); const ms_sid = result.body.sid; /* test the speech credential */ @@ -134,8 +138,37 @@ test('speech credentials tests', async(t) => { auth: authUser, json: true, }); - console.log(JSON.stringify(result)); - t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for deepgram'); + //console.log(JSON.stringify(result)); + t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for google tts'); + t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for google stt'); + } + + /* add / test a credential for microsoft */ + if (process.env.MICROSOFT_API_KEY && process.env.MICROSOFT_REGION) { + result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, { + resolveWithFullResponse: true, + auth: authUser, + json: true, + body: { + vendor: 'microsoft', + use_for_tts: true, + use_for_stt: true, + api_key: process.env.MICROSOFT_API_KEY, + region: process.env.MICROSOFT_REGION + } + }); + t.ok(result.statusCode === 201, 'successfully added speech credential for microsoft'); + const ms_sid = result.body.sid; + + /* test the speech credential */ + result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, { + resolveWithFullResponse: true, + auth: authUser, + json: true, + }); + //console.log(JSON.stringify(result)); + t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for microsoft tts'); + t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for microsoft stt'); } /* add a credential for wellsaid */ @@ -159,7 +192,7 @@ test('speech credentials tests', async(t) => { auth: authUser, json: true, }); - console.log(JSON.stringify(result)); + //console.log(JSON.stringify(result)); t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for wellsaid'); /* delete the credential */ @@ -191,7 +224,7 @@ test('speech credentials tests', async(t) => { auth: authUser, json: true, }); - console.log(JSON.stringify(result)); + //console.log(JSON.stringify(result)); t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for deepgram'); /* delete the credential */