mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
Feature/continuous asr (#119)
* bugfix: background gather for speech-only should still kill audio on dtmf entry when dtmfBargein is true * initial changes for continuous asr * move properties under recognizer * update drachtio-srf@4.5.1 * catch exception on destroy
This commit is contained in:
@@ -1258,7 +1258,7 @@ class CallSession extends Emitter {
|
|||||||
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
|
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
|
||||||
this.logger.debug('CallSession: call terminated by jambones');
|
this.logger.debug('CallSession: call terminated by jambones');
|
||||||
this.rootSpan.setAttributes({'call.termination': 'hangup by jambonz'});
|
this.rootSpan.setAttributes({'call.termination': 'hangup by jambonz'});
|
||||||
origDestroy();
|
origDestroy().catch((err) => this.logger.info({err}, 'CallSession - error destroying dialog'));
|
||||||
if (this.wakeupResolver) {
|
if (this.wakeupResolver) {
|
||||||
this.wakeupResolver({reason: 'session ended'});
|
this.wakeupResolver({reason: 'session ended'});
|
||||||
this.wakeupResolver = null;
|
this.wakeupResolver = null;
|
||||||
|
|||||||
@@ -74,7 +74,15 @@ class TaskConfig extends Task {
|
|||||||
cs.speechRecognizerLanguage = this.recognizer.language !== 'default'
|
cs.speechRecognizerLanguage = this.recognizer.language !== 'default'
|
||||||
? this.recognizer.language
|
? this.recognizer.language
|
||||||
: cs.speechRecognizerLanguage;
|
: cs.speechRecognizerLanguage;
|
||||||
this.logger.info({recognizer: this.recognizer}, 'Config: updated recognizer');
|
cs.isContinuousAsr = typeof this.recognizer.asrTimeout === 'number' ? true : false;
|
||||||
|
if (cs.isContinuousAsr) {
|
||||||
|
cs.asrTimeout = this.recognizer.asrTimeout;
|
||||||
|
cs.asrDtmfTerminationDigit = this.recognizer.asrDtmfTerminationDigit;
|
||||||
|
}
|
||||||
|
this.logger.info({
|
||||||
|
recognizer: this.recognizer,
|
||||||
|
isContinuousAsr: cs.isContinuousAsr
|
||||||
|
}, 'Config: updated recognizer');
|
||||||
}
|
}
|
||||||
if ('enable' in this.bargeIn) {
|
if ('enable' in this.bargeIn) {
|
||||||
if (this.gatherOpts) {
|
if (this.gatherOpts) {
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class TaskGather extends Task {
|
|||||||
].forEach((k) => this[k] = this.data[k]);
|
].forEach((k) => this[k] = this.data[k]);
|
||||||
|
|
||||||
/* when collecting dtmf, bargein on dtmf is true unless explicitly set to false */
|
/* when collecting dtmf, bargein on dtmf is true unless explicitly set to false */
|
||||||
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
|
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
|
||||||
|
|
||||||
/* timeout of zero means no timeout */
|
/* timeout of zero means no timeout */
|
||||||
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
|
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
|
||||||
@@ -49,6 +49,11 @@ class TaskGather extends Task {
|
|||||||
this.naicsCode = recognizer.naicsCode || 0;
|
this.naicsCode = recognizer.naicsCode || 0;
|
||||||
this.altLanguages = recognizer.altLanguages || [];
|
this.altLanguages = recognizer.altLanguages || [];
|
||||||
|
|
||||||
|
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
|
||||||
|
this.asrTimeout = typeof recognizer.asrTimeout === 'number' ? recognizer.asrTimeout * 1000 : 0;
|
||||||
|
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
|
||||||
|
this.isContinuousAsr = this.asrTimeout > 0;
|
||||||
|
|
||||||
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
||||||
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
||||||
this.vad = {enable, voiceMs, mode};
|
this.vad = {enable, voiceMs, mode};
|
||||||
@@ -65,6 +70,10 @@ class TaskGather extends Task {
|
|||||||
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
|
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
|
||||||
this.azureServiceEndpoint = recognizer.azureServiceEndpoint;
|
this.azureServiceEndpoint = recognizer.azureServiceEndpoint;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
this.hints = [];
|
||||||
|
this.altLanguages = [];
|
||||||
|
}
|
||||||
|
|
||||||
this.digitBuffer = '';
|
this.digitBuffer = '';
|
||||||
this._earlyMedia = this.data.earlyMedia === true;
|
this._earlyMedia = this.data.earlyMedia === true;
|
||||||
@@ -77,6 +86,9 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
if (!this.sayTask && !this.playTask) this.listenDuringPrompt = false;
|
if (!this.sayTask && !this.playTask) this.listenDuringPrompt = false;
|
||||||
|
|
||||||
|
/* buffer speech for continueous asr */
|
||||||
|
this._bufferedTranscripts = [];
|
||||||
|
|
||||||
this.parentTask = parentTask;
|
this.parentTask = parentTask;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,6 +121,15 @@ class TaskGather extends Task {
|
|||||||
await super.exec(cs);
|
await super.exec(cs);
|
||||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||||
|
|
||||||
|
if (!this.isContinuousAsr && cs.isContinuousAsr) {
|
||||||
|
this.isContinuousAsr = true;
|
||||||
|
this.asrTimeout = cs.asrTimeout * 1000;
|
||||||
|
this.asrDtmfTerminationDigit = cs.asrDtmfTerminationDigit;
|
||||||
|
this.logger.debug({
|
||||||
|
asrTimeout: this.asrTimeout,
|
||||||
|
asrDtmfTerminationDigit: this.asrDtmfTerminationDigit
|
||||||
|
}, 'Gather:exec - enabling continuous ASR since it is turned on for the session');
|
||||||
|
}
|
||||||
this.ep = ep;
|
this.ep = ep;
|
||||||
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
||||||
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
||||||
@@ -127,6 +148,7 @@ class TaskGather extends Task {
|
|||||||
|
|
||||||
const startListening = (cs, ep) => {
|
const startListening = (cs, ep) => {
|
||||||
this._startTimer();
|
this._startTimer();
|
||||||
|
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
||||||
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
||||||
this._initSpeech(cs, ep)
|
this._initSpeech(cs, ep)
|
||||||
.then(() => {
|
.then(() => {
|
||||||
@@ -171,7 +193,7 @@ class TaskGather extends Task {
|
|||||||
.catch(() => {/*already logged error */});
|
.catch(() => {/*already logged error */});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.input.includes('digits') || this.dtmfBargein) {
|
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
|
||||||
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
|
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -209,12 +231,15 @@ class TaskGather extends Task {
|
|||||||
this.logger.debug(evt, 'TaskGather:_onDtmf');
|
this.logger.debug(evt, 'TaskGather:_onDtmf');
|
||||||
clearTimeout(this.interDigitTimer);
|
clearTimeout(this.interDigitTimer);
|
||||||
let resolved = false;
|
let resolved = false;
|
||||||
if (this.dtmfBargein) this._killAudio(cs);
|
if (this.dtmfBargein) {
|
||||||
|
this._killAudio(cs);
|
||||||
|
this.emit('dtmf', evt);
|
||||||
|
}
|
||||||
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
|
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
|
||||||
resolved = true;
|
resolved = true;
|
||||||
this._resolve('dtmf-terminator-key');
|
this._resolve('dtmf-terminator-key');
|
||||||
}
|
}
|
||||||
else {
|
else if (this.input.includes('digits')) {
|
||||||
this.digitBuffer += evt.dtmf;
|
this.digitBuffer += evt.dtmf;
|
||||||
const len = this.digitBuffer.length;
|
const len = this.digitBuffer.length;
|
||||||
if (len === this.numDigits || len === this.maxDigits) {
|
if (len === this.numDigits || len === this.maxDigits) {
|
||||||
@@ -222,6 +247,12 @@ class TaskGather extends Task {
|
|||||||
this._resolve('dtmf-num-digits');
|
this._resolve('dtmf-num-digits');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (this.isContinuousAsr && evt.dtmf === this.asrDtmfTerminationDigit) {
|
||||||
|
this.logger.info(`continuousAsr triggered with dtmf ${this.asrDtmfTerminationDigit}`);
|
||||||
|
this._clearAsrTimer();
|
||||||
|
this._clearTimer();
|
||||||
|
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
|
||||||
|
}
|
||||||
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
|
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
|
||||||
/* start interDigitTimer */
|
/* start interDigitTimer */
|
||||||
const ms = this.interDigitTimeout * 1000;
|
const ms = this.interDigitTimeout * 1000;
|
||||||
@@ -343,14 +374,10 @@ class TaskGather extends Task {
|
|||||||
|
|
||||||
_startTimer() {
|
_startTimer() {
|
||||||
if (0 === this.timeout) return;
|
if (0 === this.timeout) return;
|
||||||
if (this._timeoutTimer) {
|
this._clearTimer();
|
||||||
clearTimeout(this._timeoutTimer);
|
|
||||||
this._timeoutTimer = null;
|
|
||||||
}
|
|
||||||
assert(!this._timeoutTimer);
|
|
||||||
this.logger.debug(`Gather:_startTimer: timeout ${this.timeout}`);
|
|
||||||
this._timeoutTimer = setTimeout(() => {
|
this._timeoutTimer = setTimeout(() => {
|
||||||
this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
|
if (this.isContinuousAsr) this._startAsrTimer();
|
||||||
|
else this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
|
||||||
}, this.timeout);
|
}, this.timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -361,6 +388,21 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_startAsrTimer() {
|
||||||
|
assert(this.isContinuousAsr);
|
||||||
|
this._clearAsrTimer();
|
||||||
|
this._asrTimer = setTimeout(() => {
|
||||||
|
this.logger.debug('_startAsrTimer - asr timer went off');
|
||||||
|
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
|
||||||
|
}, this.asrTimeout);
|
||||||
|
this.logger.debug(`_startAsrTimer: set for ${this.asrTimeout}ms`);
|
||||||
|
}
|
||||||
|
|
||||||
|
_clearAsrTimer() {
|
||||||
|
if (this._asrTimer) clearTimeout(this._asrTimer);
|
||||||
|
this._asrTimer = null;
|
||||||
|
}
|
||||||
|
|
||||||
_killAudio(cs) {
|
_killAudio(cs) {
|
||||||
if (!this.sayTask && !this.playTask && this.bargein) {
|
if (!this.sayTask && !this.playTask && this.bargein) {
|
||||||
if (this.ep?.connected && !this.playComplete) {
|
if (this.ep?.connected && !this.playComplete) {
|
||||||
@@ -419,7 +461,15 @@ class TaskGather extends Task {
|
|||||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
|
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
|
||||||
return this._startTranscribing(ep);
|
return this._startTranscribing(ep);
|
||||||
}
|
}
|
||||||
this._resolve('speech', evt);
|
if (this.isContinuousAsr) {
|
||||||
|
/* append the transcript and start listening again for asrTimeout */
|
||||||
|
this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
|
||||||
|
this._bufferedTranscripts.push(evt);
|
||||||
|
this._clearTimer();
|
||||||
|
this._startAsrTimer();
|
||||||
|
return this._startTranscribing(ep);
|
||||||
|
}
|
||||||
|
else this._resolve('speech', evt);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* google has a measure of stability:
|
/* google has a measure of stability:
|
||||||
@@ -476,6 +526,15 @@ class TaskGather extends Task {
|
|||||||
|
|
||||||
this.resolved = true;
|
this.resolved = true;
|
||||||
clearTimeout(this.interDigitTimer);
|
clearTimeout(this.interDigitTimer);
|
||||||
|
this._clearTimer();
|
||||||
|
|
||||||
|
if (this.isContinuousAsr && reason.startsWith('speech')) {
|
||||||
|
evt = {
|
||||||
|
is_final: true,
|
||||||
|
transcripts: this._bufferedTranscripts
|
||||||
|
};
|
||||||
|
this.logger.debug({evt}, 'TaskGather:resolve continuous asr');
|
||||||
|
}
|
||||||
|
|
||||||
this.span.setAttributes({'stt.resolve': reason, 'stt.result': JSON.stringify(evt)});
|
this.span.setAttributes({'stt.resolve': reason, 'stt.result': JSON.stringify(evt)});
|
||||||
if (this.ep && this.ep.connected) {
|
if (this.ep && this.ep.connected) {
|
||||||
@@ -483,8 +542,6 @@ class TaskGather extends Task {
|
|||||||
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
|
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
|
||||||
}
|
}
|
||||||
|
|
||||||
this._clearTimer();
|
|
||||||
|
|
||||||
if (this.callSession && this.callSession.callGone) {
|
if (this.callSession && this.callSession.callGone) {
|
||||||
this.logger.debug('TaskGather:_resolve - call is gone, not invoking web callback');
|
this.logger.debug('TaskGather:_resolve - call is gone, not invoking web callback');
|
||||||
this.notifyTaskDone();
|
this.notifyTaskDone();
|
||||||
|
|||||||
@@ -477,7 +477,9 @@
|
|||||||
},
|
},
|
||||||
"requestSnr": "boolean",
|
"requestSnr": "boolean",
|
||||||
"initialSpeechTimeoutMs": "number",
|
"initialSpeechTimeoutMs": "number",
|
||||||
"azureServiceEndpoint": "string"
|
"azureServiceEndpoint": "string",
|
||||||
|
"asrDtmfTerminationDigit": "string",
|
||||||
|
"asrTimeout": "number"
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"vendor"
|
"vendor"
|
||||||
|
|||||||
14
package-lock.json
generated
14
package-lock.json
generated
@@ -28,7 +28,7 @@
|
|||||||
"debug": "^4.3.4",
|
"debug": "^4.3.4",
|
||||||
"deepcopy": "^2.1.0",
|
"deepcopy": "^2.1.0",
|
||||||
"drachtio-fsmrf": "^3.0.1",
|
"drachtio-fsmrf": "^3.0.1",
|
||||||
"drachtio-srf": "^4.5.0",
|
"drachtio-srf": "^4.5.1",
|
||||||
"express": "^4.18.1",
|
"express": "^4.18.1",
|
||||||
"helmet": "^5.1.0",
|
"helmet": "^5.1.0",
|
||||||
"ip": "^1.1.8",
|
"ip": "^1.1.8",
|
||||||
@@ -2054,9 +2054,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/drachtio-srf": {
|
"node_modules/drachtio-srf": {
|
||||||
"version": "4.5.0",
|
"version": "4.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-4.5.0.tgz",
|
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-4.5.1.tgz",
|
||||||
"integrity": "sha512-3Y3H+HDvur5m8b8vChGFt0ywVETnhmKX6kUvpc7/k1cKIGBfoba2xluUsLcknS2p7yYu9TQWKGd5XZacPpU3Dg==",
|
"integrity": "sha512-7U8hhMrVIc33ysSUcbg64fDcx2jvqY4h/NdzhYsiU6tQ2fbkd0jzNYR6KD3dqhEzGxRAeEgcaPuQEG0+K2qVIA==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"debug": "^3.2.7",
|
"debug": "^3.2.7",
|
||||||
"delegates": "^0.1.0",
|
"delegates": "^0.1.0",
|
||||||
@@ -7888,9 +7888,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"drachtio-srf": {
|
"drachtio-srf": {
|
||||||
"version": "4.5.0",
|
"version": "4.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-4.5.0.tgz",
|
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-4.5.1.tgz",
|
||||||
"integrity": "sha512-3Y3H+HDvur5m8b8vChGFt0ywVETnhmKX6kUvpc7/k1cKIGBfoba2xluUsLcknS2p7yYu9TQWKGd5XZacPpU3Dg==",
|
"integrity": "sha512-7U8hhMrVIc33ysSUcbg64fDcx2jvqY4h/NdzhYsiU6tQ2fbkd0jzNYR6KD3dqhEzGxRAeEgcaPuQEG0+K2qVIA==",
|
||||||
"requires": {
|
"requires": {
|
||||||
"debug": "^3.2.7",
|
"debug": "^3.2.7",
|
||||||
"delegates": "^0.1.0",
|
"delegates": "^0.1.0",
|
||||||
|
|||||||
@@ -45,7 +45,7 @@
|
|||||||
"debug": "^4.3.4",
|
"debug": "^4.3.4",
|
||||||
"deepcopy": "^2.1.0",
|
"deepcopy": "^2.1.0",
|
||||||
"drachtio-fsmrf": "^3.0.1",
|
"drachtio-fsmrf": "^3.0.1",
|
||||||
"drachtio-srf": "^4.5.0",
|
"drachtio-srf": "^4.5.1",
|
||||||
"express": "^4.18.1",
|
"express": "^4.18.1",
|
||||||
"helmet": "^5.1.0",
|
"helmet": "^5.1.0",
|
||||||
"ip": "^1.1.8",
|
"ip": "^1.1.8",
|
||||||
|
|||||||
Reference in New Issue
Block a user