From 15f85c973086d53279974a000d109b7b0a89bb1e Mon Sep 17 00:00:00 2001 From: akirilyuk Date: Mon, 14 Feb 2022 14:08:56 +0100 Subject: [PATCH 1/5] add say and gather task features --- lib/tasks/gather.js | 167 ++++++++++++++++++++++++++++++++------------ lib/tasks/say.js | 21 ++++-- 2 files changed, 139 insertions(+), 49 deletions(-) diff --git a/lib/tasks/gather.js b/lib/tasks/gather.js index f80afe11..a73b0809 100644 --- a/lib/tasks/gather.js +++ b/lib/tasks/gather.js @@ -10,19 +10,25 @@ const { const makeTask = require('./make_task'); const assert = require('assert'); +const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7); + class TaskGather extends Task { constructor(logger, opts, parentTask) { super(logger, opts); this.preconditions = TaskPreconditions.Endpoint; [ - 'finishOnKey', 'hints', 'input', 'numDigits', - 'partialResultHook', + 'finishOnKey', 'hints', 'input', 'numDigits', 'minDigits', 'maxDigits', + 'interDigitTimeout', 'submitDigit', 'partialResultHook', 'bargein', 'dtmfBargein', + 'retries', 'retryPromptTts', 'retryPromptUrl', 'speechTimeout', 'timeout', 'say', 'play' ].forEach((k) => this[k] = this.data[k]); + this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true; + this.minBargeinWordCount = this.data.minBargeinWordCount || 1; - this.timeout = (this.timeout || 5) * 1000; - this.interim = this.partialResultCallback; + this.logger.debug({opts}, 'created gather task'); + this.timeout = (this.timeout || 15) * 1000; + this.interim = this.partialResultCallback || this.bargein; if (this.data.recognizer) { const recognizer = this.data.recognizer; this.vendor = recognizer.vendor; @@ -30,10 +36,6 @@ class TaskGather extends Task { this.hints = recognizer.hints || []; this.altLanguages = recognizer.altLanguages || []; - /* vad: if provided, we dont connect to recognizer until voice activity is detected */ - const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {}; - this.vad = {enable, voiceMs, mode}; - /* aws options */ this.vocabularyName = recognizer.vocabularyName; this.vocabularyFilterName = recognizer.vocabularyFilterName; @@ -52,6 +54,12 @@ class TaskGather extends Task { if (this.say) this.sayTask = makeTask(this.logger, {say: this.say}, this); if (this.play) this.playTask = makeTask(this.logger, {play: this.play}, this); + if(this.sayTask || this.playTask){ + // this is specially for barge in where we want to make a bargebale promt + // to a user without listening after the say task has finished + this.listenAfterSpeech = typeof this.data.listenAfterSpeech === "boolean" ? this.data.listenAfterSpeech : true; + } + this.parentTask = parentTask; } @@ -84,33 +92,63 @@ class TaskGather extends Task { throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`); } + const startListening = (cs, ep) => { + this._startTimer(); + if (this.input.includes('speech') && !this.listenDuringPrompt) { + this._initSpeech(cs, ep) + .then(() => { + this._startTranscribing(ep); + return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid); + }) + .catch(() => {}); + } + }; + try { if (this.sayTask) { - this.sayTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete - this.sayTask.on('playDone', (err) => { - if (!this.killed) this._startTimer(); + this.logger.debug('Gather: kicking off say task'); + this.sayTask.exec(cs, ep); + this.sayTask.on('playDone', async(err) => { + if (err) return this.logger.error({err}, 'Gather:exec Error playing tts'); + this.logger.debug('Gather: say task completed'); + if (!this.killed) { + if (this.listenAfterSpeech === true) { + startListening(cs, ep); + } else { + this.notifyTaskDone(); + } + } }); } else if (this.playTask) { this.playTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete - this.playTask.on('playDone', (err) => { - if (!this.killed) this._startTimer(); - }); + this.playTask.on('playDone', async(err) => { + if (err) return this.logger.error({err}, 'Gather:exec Error playing url'); + if (!this.killed) { + if (this.listenAfterSpeech === true) { + startListening(cs, ep); + } else { + this.notifyTaskDone(); + } + } + } + ); } - else this._startTimer(); + else startListening(cs, ep); - if (this.input.includes('speech')) { + if (this.input.includes('speech') && this.listenDuringPrompt) { await this._initSpeech(cs, ep); this._startTranscribing(ep); updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid) .catch(() => {/*already logged error */}); } - if (this.input.includes('digits')) { + if (this.input.includes('digits') || this.dtmfBargein) { ep.on('dtmf', this._onDtmf.bind(this, cs, ep)); } await this.awaitTaskDone(); + this.logger.debug('Gather:exec task has completed'); } catch (err) { this.logger.error(err, 'TaskGather:exec error'); } @@ -122,6 +160,7 @@ class TaskGather extends Task { } kill(cs) { + this.logger.debug('Gather:kill'); super.kill(cs); this._killAudio(cs); this.ep.removeAllListeners('dtmf'); @@ -130,23 +169,33 @@ class TaskGather extends Task { _onDtmf(cs, ep, evt) { this.logger.debug(evt, 'TaskGather:_onDtmf'); - if (evt.dtmf === this.finishOnKey) this._resolve('dtmf-terminator-key'); + clearTimeout(this.interDigitTimer); + let resolved = false; + if (this.dtmfBargein) this._killAudio(cs); + if (evt.dtmf === this.finishOnKey) { + resolved = true; + this._resolve('dtmf-terminator-key'); + } else { this.digitBuffer += evt.dtmf; - if (this.digitBuffer.length === this.numDigits) this._resolve('dtmf-num-digits'); + const len = this.digitBuffer.length; + if (len === this.numDigits || len === this.maxDigits) { + resolved = true; + this._resolve('dtmf-num-digits'); + } + } + + if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) { + /* start interDigitTimer */ + const ms = this.interDigitTimeout * 1000; + this.logger.debug(`starting interdigit timer of ${ms}`); + this.interDigitTimer = setTimeout(() => this._resolve('dtmf-interdigit-timeout'), ms); } - this._killAudio(cs); } async _initSpeech(cs, ep) { const opts = {}; - if (this.vad.enable) { - opts.START_RECOGNIZING_ON_VAD = 1; - if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs; - if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode; - } - if ('google' === this.vendor) { if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials); Object.assign(opts, { @@ -207,7 +256,7 @@ class TaskGather extends Task { ep.startTranscription({ vendor: this.vendor, locale: this.language, - interim: this.partialResultCallback ? true : false, + interim: this.interim, }).catch((err) => { const {writeAlerts, AlertType} = this.cs.srf.locals; this.logger.error(err, 'TaskGather:_startTranscribing error'); @@ -247,25 +296,56 @@ class TaskGather extends Task { } _onTranscription(cs, ep, evt) { + this.logger.debug(evt, 'TaskGather:_onTranscription'); if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0]; if ('microsoft' === this.vendor) { - const nbest = evt.NBest; - const newEvent = { - is_final: evt.RecognitionStatus === 'Success', - alternatives: [ - { - confidence: nbest[0].Confidence, - transcript: nbest[0].Display - } - ] - }; - evt = newEvent; + const final = evt.RecognitionStatus === 'Success'; + if (final) { + const nbest = evt.NBest; + evt = { + is_final: true, + alternatives: [ + { + confidence: nbest[0].Confidence, + transcript: nbest[0].Display + } + ] + }; + } + else { + evt = { + is_final: false, + alternatives: [ + { + transcript: evt.Text + } + ] + }; + } } - this.logger.debug(evt, 'TaskGather:_onTranscription'); if (evt.is_final) this._resolve('speech', evt); - else if (this.partialResultHook) { - this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo)) - .catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error')); + else { + const recognizeSuccess = evt.stability > GATHER_STABILITY_THRESHOLD; + /* + we need to make sure to only send something on barge in if we have + something valid therefore we need to check the recognition + stability, which applies to GOOGLE + for MS we will have a final event, meaning we will not run into + the current if else branch. + + For AWS we still need more testing + */ + if (recognizeSuccess && + this.bargein && + evt.alternatives[0].transcript.split(' ').length >= this.minBargeinWordCount) { + this.logger.debug('Gather:_onTranscription - killing audio due to bargein'); + this._killAudio(cs); + this._resolve('speech', evt); + } + if (this.partialResultHook) { + this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo)) + .catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error')); + } } } _onEndOfUtterance(cs, ep) { @@ -291,7 +371,8 @@ class TaskGather extends Task { this._clearTimer(); if (reason.startsWith('dtmf')) { - await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'}); + if (this.parentTask) this.parentTask.emit('dtmf-collected', {reason, digits: this.digitBuffer}); + else await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'}); } else if (reason.startsWith('speech')) { if (this.parentTask) this.parentTask.emit('transcription', evt); diff --git a/lib/tasks/say.js b/lib/tasks/say.js index 3d100f1d..4001a562 100644 --- a/lib/tasks/say.js +++ b/lib/tasks/say.js @@ -21,15 +21,20 @@ class TaskSay extends Task { const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {writeAlerts, AlertType, stats} = srf.locals; const {synthAudio} = srf.locals.dbHelpers; - const hasVerbLevelTts = this.synthesizer.vendor && this.synthesizer.vendor !== 'default'; - const vendor = hasVerbLevelTts ? this.synthesizer.vendor : cs.speechSynthesisVendor ; - const language = hasVerbLevelTts ? this.synthesizer.language : cs.speechSynthesisLanguage ; - const voice = hasVerbLevelTts ? this.synthesizer.voice : cs.speechSynthesisVoice ; + const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ? this.synthesizer.vendor : cs.speechSynthesisVendor; + const language = this.synthesizer.language && this.synthesizer.language !== 'default' ? this.synthesizer.language : cs.speechSynthesisLanguage ; + const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ? this.synthesizer.voice : cs.speechSynthesisVoice; const engine = this.synthesizer.engine || 'standard'; const salt = cs.callSid; const credentials = cs.getSpeechCredentials(vendor, 'tts'); - this.logger.info({language, voice}, `Task:say - using vendor: ${vendor}`); + this.logger.info({language, + voice, + localSynthesizer: this.synthesizer, + speechSynthesisVendor: cs.speechSynthesisVendor, + speechSynthesisLanguage: cs.speechSynthesisLanguage, + speechSynthesisVoice: cs.speechSynthesisVoice + }, `Task:say - using vendor: ${vendor}`); this.ep = ep; try { if (!credentials) { @@ -79,7 +84,11 @@ class TaskSay extends Task { const {memberId, confName, confUuid} = cs; await this.playToConfMember(this.ep, memberId, confName, confUuid, filepath[segment]); } - else await ep.play(filepath[segment]); + else { + this.logger.debug(`Say:exec sending command to play file ${filepath[segment]}`); + await ep.play(filepath[segment]); + this.logger.debug(`Say:exec completed play file ${filepath[segment]}`); + } } while (!this.killed && ++segment < filepath.length); } } catch (err) { From d79c733aa2969060df2f0dcbb02ffc4e13431a86 Mon Sep 17 00:00:00 2001 From: akirilyuk Date: Mon, 14 Feb 2022 14:16:44 +0100 Subject: [PATCH 2/5] fix linting --- lib/tasks/gather.js | 16 ++++++++-------- lib/tasks/say.js | 16 +++++++++++----- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/lib/tasks/gather.js b/lib/tasks/gather.js index a73b0809..12755e85 100644 --- a/lib/tasks/gather.js +++ b/lib/tasks/gather.js @@ -10,7 +10,7 @@ const { const makeTask = require('./make_task'); const assert = require('assert'); -const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7); +const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7); class TaskGather extends Task { constructor(logger, opts, parentTask) { @@ -54,10 +54,10 @@ class TaskGather extends Task { if (this.say) this.sayTask = makeTask(this.logger, {say: this.say}, this); if (this.play) this.playTask = makeTask(this.logger, {play: this.play}, this); - if(this.sayTask || this.playTask){ - // this is specially for barge in where we want to make a bargebale promt - // to a user without listening after the say task has finished - this.listenAfterSpeech = typeof this.data.listenAfterSpeech === "boolean" ? this.data.listenAfterSpeech : true; + if (this.sayTask || this.playTask) { + // this is specially for barge in where we want to make a bargebale promt + // to a user without listening after the say task has finished + this.listenAfterSpeech = typeof this.data.listenAfterSpeech === 'boolean' ? this.data.listenAfterSpeech : true; } this.parentTask = parentTask; @@ -326,11 +326,11 @@ class TaskGather extends Task { if (evt.is_final) this._resolve('speech', evt); else { const recognizeSuccess = evt.stability > GATHER_STABILITY_THRESHOLD; - /* - we need to make sure to only send something on barge in if we have + /* + we need to make sure to only send something on barge in if we have something valid therefore we need to check the recognition stability, which applies to GOOGLE - for MS we will have a final event, meaning we will not run into + for MS we will have a final event, meaning we will not run into the current if else branch. For AWS we still need more testing diff --git a/lib/tasks/say.js b/lib/tasks/say.js index 4001a562..5f640720 100644 --- a/lib/tasks/say.js +++ b/lib/tasks/say.js @@ -21,15 +21,21 @@ class TaskSay extends Task { const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {writeAlerts, AlertType, stats} = srf.locals; const {synthAudio} = srf.locals.dbHelpers; - const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ? this.synthesizer.vendor : cs.speechSynthesisVendor; - const language = this.synthesizer.language && this.synthesizer.language !== 'default' ? this.synthesizer.language : cs.speechSynthesisLanguage ; - const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ? this.synthesizer.voice : cs.speechSynthesisVoice; + const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' + ? this.synthesizer.vendor + : cs.speechSynthesisVendor; + const language = this.synthesizer.language && this.synthesizer.language !== 'default' + ? this.synthesizer.language + : cs.speechSynthesisLanguage ; + const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' + ? this.synthesizer.voice + : cs.speechSynthesisVoice; const engine = this.synthesizer.engine || 'standard'; const salt = cs.callSid; const credentials = cs.getSpeechCredentials(vendor, 'tts'); - this.logger.info({language, - voice, + this.logger.info({language, + voice, localSynthesizer: this.synthesizer, speechSynthesisVendor: cs.speechSynthesisVendor, speechSynthesisLanguage: cs.speechSynthesisLanguage, From b126719ba738010646f66e4d7cc83eaf3d9fee3d Mon Sep 17 00:00:00 2001 From: akirilyuk Date: Mon, 14 Feb 2022 14:22:04 +0100 Subject: [PATCH 3/5] add husky pre push hook --- .husky/pre-push | 3 +++ package-lock.json | 22 ++++++++++++++++++++++ package.json | 1 + 3 files changed, 26 insertions(+) create mode 100644 .husky/pre-push diff --git a/.husky/pre-push b/.husky/pre-push new file mode 100644 index 00000000..eee4bd26 --- /dev/null +++ b/.husky/pre-push @@ -0,0 +1,3 @@ +#!/bin/sh +. "$(dirname "$0")/_/husky.sh" +npm run jslint \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index bcec87c5..5a9e6f72 100644 --- a/package-lock.json +++ b/package-lock.json @@ -39,6 +39,7 @@ "clear-module": "^4.1.1", "eslint": "^7.20.0", "eslint-plugin-promise": "^4.3.1", + "husky": "^7.0.4", "nyc": "^15.1.0", "tape": "^5.2.2" }, @@ -2758,6 +2759,21 @@ "node": ">= 6" } }, + "node_modules/husky": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/husky/-/husky-7.0.4.tgz", + "integrity": "sha512-vbaCKN2QLtP/vD4yvs6iz6hBEo6wkSzs8HpRah1Z6aGmF2KW5PdYuAd7uX5a+OyBZHBhd+TFLqgjUgytQr4RvQ==", + "dev": true, + "bin": { + "husky": "lib/bin.js" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/typicode" + } + }, "node_modules/iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -7658,6 +7674,12 @@ "debug": "4" } }, + "husky": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/husky/-/husky-7.0.4.tgz", + "integrity": "sha512-vbaCKN2QLtP/vD4yvs6iz6hBEo6wkSzs8HpRah1Z6aGmF2KW5PdYuAd7uX5a+OyBZHBhd+TFLqgjUgytQr4RvQ==", + "dev": true + }, "iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", diff --git a/package.json b/package.json index 9ed38595..8d61eb89 100644 --- a/package.json +++ b/package.json @@ -56,6 +56,7 @@ "clear-module": "^4.1.1", "eslint": "^7.20.0", "eslint-plugin-promise": "^4.3.1", + "husky": "7.0.4", "nyc": "^15.1.0", "tape": "^5.2.2" } From 63c0c9702475e2bc2876674d101c20b97ee4013d Mon Sep 17 00:00:00 2001 From: akirilyuk Date: Mon, 14 Feb 2022 14:23:26 +0100 Subject: [PATCH 4/5] add husky pre push hook --- .husky/pre-push | 0 package-lock.json | 2 +- package.json | 3 ++- 3 files changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 .husky/pre-push diff --git a/.husky/pre-push b/.husky/pre-push old mode 100644 new mode 100755 diff --git a/package-lock.json b/package-lock.json index 5a9e6f72..bba63e96 100644 --- a/package-lock.json +++ b/package-lock.json @@ -39,7 +39,7 @@ "clear-module": "^4.1.1", "eslint": "^7.20.0", "eslint-plugin-promise": "^4.3.1", - "husky": "^7.0.4", + "husky": "7.0.4", "nyc": "^15.1.0", "tape": "^5.2.2" }, diff --git a/package.json b/package.json index 8d61eb89..6353fe8b 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,8 @@ "start": "node app", "test": "NODE_ENV=test JAMBONES_HOSTING=1 DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=debug ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:ClueCon:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ", "coverage": "./node_modules/.bin/nyc --reporter html --report-dir ./coverage npm run test", - "jslint": "eslint app.js lib" + "jslint": "eslint app.js lib", + "prepare": "husky install" }, "dependencies": { "@cognigy/socket-client": "^4.5.5", From caa7b3a03aec649ac743f6a865d68bc21be63e8a Mon Sep 17 00:00:00 2001 From: akirilyuk Date: Mon, 14 Feb 2022 17:11:47 +0100 Subject: [PATCH 5/5] change log level to debug --- lib/tasks/say.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tasks/say.js b/lib/tasks/say.js index 5f640720..1d2904f9 100644 --- a/lib/tasks/say.js +++ b/lib/tasks/say.js @@ -34,7 +34,7 @@ class TaskSay extends Task { const salt = cs.callSid; const credentials = cs.getSpeechCredentials(vendor, 'tts'); - this.logger.info({language, + this.logger.debug({language, voice, localSynthesizer: this.synthesizer, speechSynthesisVendor: cs.speechSynthesisVendor,