Feature/continuous asr (#119)

* bugfix: background gather for speech-only should still kill audio on dtmf entry when dtmfBargein is true

* initial changes for continuous asr

* move properties under recognizer

* update drachtio-srf@4.5.1

* catch exception on destroy
This commit is contained in:
Dave Horton
2022-06-21 10:35:27 -04:00
committed by GitHub
parent bdb40b3aa0
commit 627c38899f
6 changed files with 92 additions and 25 deletions

View File

@@ -23,7 +23,7 @@ class TaskGather extends Task {
].forEach((k) => this[k] = this.data[k]);
/* when collecting dtmf, bargein on dtmf is true unless explicitly set to false */
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
/* timeout of zero means no timeout */
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
@@ -49,6 +49,11 @@ class TaskGather extends Task {
this.naicsCode = recognizer.naicsCode || 0;
this.altLanguages = recognizer.altLanguages || [];
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
this.asrTimeout = typeof recognizer.asrTimeout === 'number' ? recognizer.asrTimeout * 1000 : 0;
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
this.isContinuousAsr = this.asrTimeout > 0;
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
this.vad = {enable, voiceMs, mode};
@@ -65,6 +70,10 @@ class TaskGather extends Task {
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
this.azureServiceEndpoint = recognizer.azureServiceEndpoint;
}
else {
this.hints = [];
this.altLanguages = [];
}
this.digitBuffer = '';
this._earlyMedia = this.data.earlyMedia === true;
@@ -77,6 +86,9 @@ class TaskGather extends Task {
}
if (!this.sayTask && !this.playTask) this.listenDuringPrompt = false;
/* buffer speech for continueous asr */
this._bufferedTranscripts = [];
this.parentTask = parentTask;
}
@@ -109,6 +121,15 @@ class TaskGather extends Task {
await super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
if (!this.isContinuousAsr && cs.isContinuousAsr) {
this.isContinuousAsr = true;
this.asrTimeout = cs.asrTimeout * 1000;
this.asrDtmfTerminationDigit = cs.asrDtmfTerminationDigit;
this.logger.debug({
asrTimeout: this.asrTimeout,
asrDtmfTerminationDigit: this.asrDtmfTerminationDigit
}, 'Gather:exec - enabling continuous ASR since it is turned on for the session');
}
this.ep = ep;
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
@@ -127,6 +148,7 @@ class TaskGather extends Task {
const startListening = (cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
this._initSpeech(cs, ep)
.then(() => {
@@ -171,7 +193,7 @@ class TaskGather extends Task {
.catch(() => {/*already logged error */});
}
if (this.input.includes('digits') || this.dtmfBargein) {
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
}
@@ -209,12 +231,15 @@ class TaskGather extends Task {
this.logger.debug(evt, 'TaskGather:_onDtmf');
clearTimeout(this.interDigitTimer);
let resolved = false;
if (this.dtmfBargein) this._killAudio(cs);
if (this.dtmfBargein) {
this._killAudio(cs);
this.emit('dtmf', evt);
}
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
resolved = true;
this._resolve('dtmf-terminator-key');
}
else {
else if (this.input.includes('digits')) {
this.digitBuffer += evt.dtmf;
const len = this.digitBuffer.length;
if (len === this.numDigits || len === this.maxDigits) {
@@ -222,6 +247,12 @@ class TaskGather extends Task {
this._resolve('dtmf-num-digits');
}
}
else if (this.isContinuousAsr && evt.dtmf === this.asrDtmfTerminationDigit) {
this.logger.info(`continuousAsr triggered with dtmf ${this.asrDtmfTerminationDigit}`);
this._clearAsrTimer();
this._clearTimer();
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
}
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
/* start interDigitTimer */
const ms = this.interDigitTimeout * 1000;
@@ -343,14 +374,10 @@ class TaskGather extends Task {
_startTimer() {
if (0 === this.timeout) return;
if (this._timeoutTimer) {
clearTimeout(this._timeoutTimer);
this._timeoutTimer = null;
}
assert(!this._timeoutTimer);
this.logger.debug(`Gather:_startTimer: timeout ${this.timeout}`);
this._clearTimer();
this._timeoutTimer = setTimeout(() => {
this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
if (this.isContinuousAsr) this._startAsrTimer();
else this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
}, this.timeout);
}
@@ -361,6 +388,21 @@ class TaskGather extends Task {
}
}
_startAsrTimer() {
assert(this.isContinuousAsr);
this._clearAsrTimer();
this._asrTimer = setTimeout(() => {
this.logger.debug('_startAsrTimer - asr timer went off');
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
}, this.asrTimeout);
this.logger.debug(`_startAsrTimer: set for ${this.asrTimeout}ms`);
}
_clearAsrTimer() {
if (this._asrTimer) clearTimeout(this._asrTimer);
this._asrTimer = null;
}
_killAudio(cs) {
if (!this.sayTask && !this.playTask && this.bargein) {
if (this.ep?.connected && !this.playComplete) {
@@ -419,7 +461,15 @@ class TaskGather extends Task {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
return this._startTranscribing(ep);
}
this._resolve('speech', evt);
if (this.isContinuousAsr) {
/* append the transcript and start listening again for asrTimeout */
this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
this._bufferedTranscripts.push(evt);
this._clearTimer();
this._startAsrTimer();
return this._startTranscribing(ep);
}
else this._resolve('speech', evt);
}
else {
/* google has a measure of stability:
@@ -476,6 +526,15 @@ class TaskGather extends Task {
this.resolved = true;
clearTimeout(this.interDigitTimer);
this._clearTimer();
if (this.isContinuousAsr && reason.startsWith('speech')) {
evt = {
is_final: true,
transcripts: this._bufferedTranscripts
};
this.logger.debug({evt}, 'TaskGather:resolve continuous asr');
}
this.span.setAttributes({'stt.resolve': reason, 'stt.result': JSON.stringify(evt)});
if (this.ep && this.ep.connected) {
@@ -483,8 +542,6 @@ class TaskGather extends Task {
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
}
this._clearTimer();
if (this.callSession && this.callSession.callGone) {
this.logger.debug('TaskGather:_resolve - call is gone, not invoking web callback');
this.notifyTaskDone();