mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
Feature/continuous asr (#119)
* bugfix: background gather for speech-only should still kill audio on dtmf entry when dtmfBargein is true * initial changes for continuous asr * move properties under recognizer * update drachtio-srf@4.5.1 * catch exception on destroy
This commit is contained in:
@@ -23,7 +23,7 @@ class TaskGather extends Task {
|
||||
].forEach((k) => this[k] = this.data[k]);
|
||||
|
||||
/* when collecting dtmf, bargein on dtmf is true unless explicitly set to false */
|
||||
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
|
||||
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
|
||||
|
||||
/* timeout of zero means no timeout */
|
||||
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
|
||||
@@ -49,6 +49,11 @@ class TaskGather extends Task {
|
||||
this.naicsCode = recognizer.naicsCode || 0;
|
||||
this.altLanguages = recognizer.altLanguages || [];
|
||||
|
||||
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
|
||||
this.asrTimeout = typeof recognizer.asrTimeout === 'number' ? recognizer.asrTimeout * 1000 : 0;
|
||||
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
|
||||
this.isContinuousAsr = this.asrTimeout > 0;
|
||||
|
||||
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
||||
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
||||
this.vad = {enable, voiceMs, mode};
|
||||
@@ -65,6 +70,10 @@ class TaskGather extends Task {
|
||||
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
|
||||
this.azureServiceEndpoint = recognizer.azureServiceEndpoint;
|
||||
}
|
||||
else {
|
||||
this.hints = [];
|
||||
this.altLanguages = [];
|
||||
}
|
||||
|
||||
this.digitBuffer = '';
|
||||
this._earlyMedia = this.data.earlyMedia === true;
|
||||
@@ -77,6 +86,9 @@ class TaskGather extends Task {
|
||||
}
|
||||
if (!this.sayTask && !this.playTask) this.listenDuringPrompt = false;
|
||||
|
||||
/* buffer speech for continueous asr */
|
||||
this._bufferedTranscripts = [];
|
||||
|
||||
this.parentTask = parentTask;
|
||||
}
|
||||
|
||||
@@ -109,6 +121,15 @@ class TaskGather extends Task {
|
||||
await super.exec(cs);
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||
|
||||
if (!this.isContinuousAsr && cs.isContinuousAsr) {
|
||||
this.isContinuousAsr = true;
|
||||
this.asrTimeout = cs.asrTimeout * 1000;
|
||||
this.asrDtmfTerminationDigit = cs.asrDtmfTerminationDigit;
|
||||
this.logger.debug({
|
||||
asrTimeout: this.asrTimeout,
|
||||
asrDtmfTerminationDigit: this.asrDtmfTerminationDigit
|
||||
}, 'Gather:exec - enabling continuous ASR since it is turned on for the session');
|
||||
}
|
||||
this.ep = ep;
|
||||
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
||||
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
||||
@@ -127,6 +148,7 @@ class TaskGather extends Task {
|
||||
|
||||
const startListening = (cs, ep) => {
|
||||
this._startTimer();
|
||||
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
||||
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
||||
this._initSpeech(cs, ep)
|
||||
.then(() => {
|
||||
@@ -171,7 +193,7 @@ class TaskGather extends Task {
|
||||
.catch(() => {/*already logged error */});
|
||||
}
|
||||
|
||||
if (this.input.includes('digits') || this.dtmfBargein) {
|
||||
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
|
||||
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
|
||||
}
|
||||
|
||||
@@ -209,12 +231,15 @@ class TaskGather extends Task {
|
||||
this.logger.debug(evt, 'TaskGather:_onDtmf');
|
||||
clearTimeout(this.interDigitTimer);
|
||||
let resolved = false;
|
||||
if (this.dtmfBargein) this._killAudio(cs);
|
||||
if (this.dtmfBargein) {
|
||||
this._killAudio(cs);
|
||||
this.emit('dtmf', evt);
|
||||
}
|
||||
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
|
||||
resolved = true;
|
||||
this._resolve('dtmf-terminator-key');
|
||||
}
|
||||
else {
|
||||
else if (this.input.includes('digits')) {
|
||||
this.digitBuffer += evt.dtmf;
|
||||
const len = this.digitBuffer.length;
|
||||
if (len === this.numDigits || len === this.maxDigits) {
|
||||
@@ -222,6 +247,12 @@ class TaskGather extends Task {
|
||||
this._resolve('dtmf-num-digits');
|
||||
}
|
||||
}
|
||||
else if (this.isContinuousAsr && evt.dtmf === this.asrDtmfTerminationDigit) {
|
||||
this.logger.info(`continuousAsr triggered with dtmf ${this.asrDtmfTerminationDigit}`);
|
||||
this._clearAsrTimer();
|
||||
this._clearTimer();
|
||||
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
|
||||
}
|
||||
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
|
||||
/* start interDigitTimer */
|
||||
const ms = this.interDigitTimeout * 1000;
|
||||
@@ -343,14 +374,10 @@ class TaskGather extends Task {
|
||||
|
||||
_startTimer() {
|
||||
if (0 === this.timeout) return;
|
||||
if (this._timeoutTimer) {
|
||||
clearTimeout(this._timeoutTimer);
|
||||
this._timeoutTimer = null;
|
||||
}
|
||||
assert(!this._timeoutTimer);
|
||||
this.logger.debug(`Gather:_startTimer: timeout ${this.timeout}`);
|
||||
this._clearTimer();
|
||||
this._timeoutTimer = setTimeout(() => {
|
||||
this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
|
||||
if (this.isContinuousAsr) this._startAsrTimer();
|
||||
else this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
|
||||
}, this.timeout);
|
||||
}
|
||||
|
||||
@@ -361,6 +388,21 @@ class TaskGather extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
_startAsrTimer() {
|
||||
assert(this.isContinuousAsr);
|
||||
this._clearAsrTimer();
|
||||
this._asrTimer = setTimeout(() => {
|
||||
this.logger.debug('_startAsrTimer - asr timer went off');
|
||||
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
|
||||
}, this.asrTimeout);
|
||||
this.logger.debug(`_startAsrTimer: set for ${this.asrTimeout}ms`);
|
||||
}
|
||||
|
||||
_clearAsrTimer() {
|
||||
if (this._asrTimer) clearTimeout(this._asrTimer);
|
||||
this._asrTimer = null;
|
||||
}
|
||||
|
||||
_killAudio(cs) {
|
||||
if (!this.sayTask && !this.playTask && this.bargein) {
|
||||
if (this.ep?.connected && !this.playComplete) {
|
||||
@@ -419,7 +461,15 @@ class TaskGather extends Task {
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
|
||||
return this._startTranscribing(ep);
|
||||
}
|
||||
this._resolve('speech', evt);
|
||||
if (this.isContinuousAsr) {
|
||||
/* append the transcript and start listening again for asrTimeout */
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
|
||||
this._bufferedTranscripts.push(evt);
|
||||
this._clearTimer();
|
||||
this._startAsrTimer();
|
||||
return this._startTranscribing(ep);
|
||||
}
|
||||
else this._resolve('speech', evt);
|
||||
}
|
||||
else {
|
||||
/* google has a measure of stability:
|
||||
@@ -476,6 +526,15 @@ class TaskGather extends Task {
|
||||
|
||||
this.resolved = true;
|
||||
clearTimeout(this.interDigitTimer);
|
||||
this._clearTimer();
|
||||
|
||||
if (this.isContinuousAsr && reason.startsWith('speech')) {
|
||||
evt = {
|
||||
is_final: true,
|
||||
transcripts: this._bufferedTranscripts
|
||||
};
|
||||
this.logger.debug({evt}, 'TaskGather:resolve continuous asr');
|
||||
}
|
||||
|
||||
this.span.setAttributes({'stt.resolve': reason, 'stt.result': JSON.stringify(evt)});
|
||||
if (this.ep && this.ep.connected) {
|
||||
@@ -483,8 +542,6 @@ class TaskGather extends Task {
|
||||
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
|
||||
}
|
||||
|
||||
this._clearTimer();
|
||||
|
||||
if (this.callSession && this.callSession.callGone) {
|
||||
this.logger.debug('TaskGather:_resolve - call is gone, not invoking web callback');
|
||||
this.notifyTaskDone();
|
||||
|
||||
Reference in New Issue
Block a user