Feature/gather enhancements (#73)

* add bargein support to gather

* bugfix: gather handles interim results from azure

* gather: support for min/max digits and interdigit timeout

* add task summary to some log messages

* logging improvements
This commit is contained in:
Dave Horton
2022-02-27 13:38:02 -05:00
committed by GitHub
parent 3c5d392407
commit f317fbaa45
8 changed files with 160 additions and 45 deletions

View File

@@ -135,6 +135,11 @@ class TaskDial extends Task {
return !process.env.ANCHOR_MEDIA_ALWAYS && !this.listenTask && !this.transcribeTask;
}
get summary() {
if (this.target.length === 1) return `${this.name}{type=${this.target[0].type}}`;
else return `${this.name}{${this.target.length} targets}`;
}
async exec(cs) {
await super.exec(cs);
try {

View File

@@ -9,6 +9,7 @@ const {
const makeTask = require('./make_task');
const assert = require('assert');
const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7);
class TaskGather extends Task {
constructor(logger, opts, parentTask) {
@@ -16,12 +17,15 @@ class TaskGather extends Task {
this.preconditions = TaskPreconditions.Endpoint;
[
'finishOnKey', 'hints', 'input', 'numDigits',
'partialResultHook',
'finishOnKey', 'hints', 'input', 'numDigits', 'minDigits', 'maxDigits',
'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
'speechTimeout', 'timeout', 'say', 'play'
].forEach((k) => this[k] = this.data[k]);
this.timeout = (this.timeout || 5) * 1000;
/* when collecting dtmf, bargein on dtmf is true unless explicitly set to false */
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
this.timeout = (this.timeout || 15) * 1000;
this.interim = this.partialResultCallback;
if (this.data.recognizer) {
const recognizer = this.data.recognizer;
@@ -34,6 +38,9 @@ class TaskGather extends Task {
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
this.vad = {enable, voiceMs, mode};
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
/* aws options */
this.vocabularyName = recognizer.vocabularyName;
this.vocabularyFilterName = recognizer.vocabularyFilterName;
@@ -64,6 +71,21 @@ class TaskGather extends Task {
(this.playTask && this.playTask.earlyMedia);
}
get summary() {
let s = `${this.name}{`;
if (this.input.length === 2) s += 'inputs=[speech,digits],';
else if (this.input.includes('digits')) s += 'inputs=digits';
else s += 'inputs=speech,';
if (this.input.includes('speech')) {
s += `vendor=${this.vendor},language=${this.language}`;
}
if (this.sayTask) s += ',with nested say task';
if (this.playTask) s += ',with nested play task';
s += '}';
return s;
}
async exec(cs, ep) {
await super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
@@ -84,29 +106,43 @@ class TaskGather extends Task {
throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`);
}
const startListening = (cs, ep) => {
this._startTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
this._initSpeech(cs, ep)
.then(() => {
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
})
.catch(() => {});
}
};
try {
if (this.sayTask) {
this.sayTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete
this.sayTask.on('playDone', (err) => {
if (!this.killed) this._startTimer();
if (err) return this.logger.error({err}, 'Gather:exec Error playing tts');
this.logger.debug('Gather: say task completed');
});
}
else if (this.playTask) {
this.playTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete
this.playTask.on('playDone', (err) => {
if (!this.killed) this._startTimer();
if (err) return this.logger.error({err}, 'Gather:exec Error playing url');
if (!this.killed) startListening(cs, ep);
});
}
else this._startTimer();
if (this.input.includes('speech')) {
if (this.input.includes('speech') && this.listenDuringPrompt) {
await this._initSpeech(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
}
if (this.input.includes('digits')) {
if (this.input.includes('digits') || this.dtmfBargein) {
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
}
@@ -125,17 +161,33 @@ class TaskGather extends Task {
super.kill(cs);
this._killAudio(cs);
this.ep.removeAllListeners('dtmf');
clearTimeout(this.interDigitTimer);
this._resolve('killed');
}
_onDtmf(cs, ep, evt) {
this.logger.debug(evt, 'TaskGather:_onDtmf');
if (evt.dtmf === this.finishOnKey) this._resolve('dtmf-terminator-key');
clearTimeout(this.interDigitTimer);
let resolved = false;
if (this.dtmfBargein) this._killAudio(cs);
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
resolved = true;
this._resolve('dtmf-terminator-key');
}
else {
this.digitBuffer += evt.dtmf;
if (this.digitBuffer.length === this.numDigits) this._resolve('dtmf-num-digits');
const len = this.digitBuffer.length;
if (len === this.numDigits || len === this.maxDigits) {
resolved = true;
this._resolve('dtmf-num-digits');
}
}
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
/* start interDigitTimer */
const ms = this.interDigitTimeout * 1000;
this.logger.debug(`starting interdigit timer of ${ms}`);
this.interDigitTimer = setTimeout(() => this._resolve('dtmf-interdigit-timeout'), ms);
}
this._killAudio(cs);
}
async _initSpeech(cs, ep) {
@@ -207,7 +259,7 @@ class TaskGather extends Task {
ep.startTranscription({
vendor: this.vendor,
locale: this.language,
interim: this.partialResultCallback ? true : false,
interim: this.partialResultCallback || this.bargein,
}).catch((err) => {
const {writeAlerts, AlertType} = this.cs.srf.locals;
this.logger.error(err, 'TaskGather:_startTranscribing error');
@@ -249,23 +301,47 @@ class TaskGather extends Task {
_onTranscription(cs, ep, evt) {
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
if ('microsoft' === this.vendor) {
const nbest = evt.NBest;
const newEvent = {
is_final: evt.RecognitionStatus === 'Success',
alternatives: [
{
confidence: nbest[0].Confidence,
transcript: nbest[0].Display
}
]
};
evt = newEvent;
const final = evt.RecognitionStatus === 'Success';
if (final) {
const nbest = evt.NBest;
evt = {
is_final: true,
alternatives: [
{
confidence: nbest[0].Confidence,
transcript: nbest[0].Display
}
]
};
}
else {
evt = {
is_final: false,
alternatives: [
{
transcript: evt.Text
}
]
};
}
}
this.logger.debug(evt, 'TaskGather:_onTranscription');
if (evt.is_final) this._resolve('speech', evt);
else if (this.partialResultHook) {
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
else {
/* google has a measure of stability:
https://cloud.google.com/speech-to-text/docs/basics#streaming_responses
others do not.
*/
const isStableEnough = typeof evt.stability === 'undefined' || evt.stability > GATHER_STABILITY_THRESHOLD;
if (this.bargein && isStableEnough &&
evt.alternatives[0].transcript.split(' ').length >= this.minBargeinWordCount) {
this.logger.debug('Gather:_onTranscription - killing audio due to speech bargein');
this._killAudio(cs);
}
if (this.partialResultHook) {
this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
}
}
}
_onEndOfUtterance(cs, ep) {
@@ -283,6 +359,7 @@ class TaskGather extends Task {
if (this.resolved) return;
this.resolved = true;
this.logger.debug(`TaskGather:resolve with reason ${reason}`);
clearTimeout(this.interDigitTimer);
if (this.ep && this.ep.connected) {
this.ep.stopTranscription({vendor: this.vendor})

View File

@@ -14,6 +14,14 @@ class TaskSay extends Task {
get name() { return TaskName.Say; }
get summary() {
for (let i = 0; i < this.text.length; i++) {
if (this.text[i].startsWith('silence_stream')) continue;
return `${this.name}{text=${this.text[i].slice(0, 15)}${this.text[i].length > 15 ? '...' : ''}}`;
}
return this.text[0];
}
async exec(cs, ep) {
await super.exec(cs);
@@ -21,15 +29,20 @@ class TaskSay extends Task {
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals;
const {synthAudio} = srf.locals.dbHelpers;
const hasVerbLevelTts = this.synthesizer.vendor && this.synthesizer.vendor !== 'default';
const vendor = hasVerbLevelTts ? this.synthesizer.vendor : cs.speechSynthesisVendor ;
const language = hasVerbLevelTts ? this.synthesizer.language : cs.speechSynthesisLanguage ;
const voice = hasVerbLevelTts ? this.synthesizer.voice : cs.speechSynthesisVoice ;
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
this.synthesizer.vendor :
cs.speechSynthesisVendor;
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
this.synthesizer.language :
cs.speechSynthesisLanguage ;
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
this.synthesizer.voice :
cs.speechSynthesisVoice;
const engine = this.synthesizer.engine || 'standard';
const salt = cs.callSid;
const credentials = cs.getSpeechCredentials(vendor, 'tts');
this.logger.info({language, voice}, `Task:say - using vendor: ${vendor}`);
this.logger.info({vendor, language, voice}, 'TaskSay:exec');
this.ep = ep;
try {
if (!credentials) {
@@ -80,7 +93,11 @@ class TaskSay extends Task {
const {memberId, confName, confUuid} = cs;
await this.playToConfMember(this.ep, memberId, confName, confUuid, filepath[segment]);
}
else await ep.play(filepath[segment]);
else {
this.logger.debug(`Say:exec sending command to play file ${filepath[segment]}`);
await ep.play(filepath[segment]);
this.logger.debug(`Say:exec completed play file ${filepath[segment]}`);
}
} while (!this.killed && ++segment < filepath.length);
}
} catch (err) {

View File

@@ -98,8 +98,15 @@
"finishOnKey": "string",
"input": "array",
"numDigits": "number",
"minDigits": "number",
"maxDigits": "number",
"interDigitTimeout": "number",
"partialResultHook": "object|string",
"speechTimeout": "number",
"listenDuringPrompt": "boolean",
"dtmfBargein": "boolean",
"bargein": "boolean",
"minBargeinWordCount": "number",
"timeout": "number",
"recognizer": "#recognizer",
"play": "#play",

View File

@@ -42,6 +42,10 @@ class Task extends Emitter {
return this.cs;
}
get summary() {
return this.name;
}
toJSON() {
return this.data;
}