add say and gather task features

This commit is contained in:
akirilyuk
2022-02-14 14:08:56 +01:00
parent c1130adf03
commit 15f85c9730
2 changed files with 139 additions and 49 deletions

View File

@@ -10,19 +10,25 @@ const {
const makeTask = require('./make_task');
const assert = require('assert');
const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7);
class TaskGather extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint;
[
'finishOnKey', 'hints', 'input', 'numDigits',
'partialResultHook',
'finishOnKey', 'hints', 'input', 'numDigits', 'minDigits', 'maxDigits',
'interDigitTimeout', 'submitDigit', 'partialResultHook', 'bargein', 'dtmfBargein',
'retries', 'retryPromptTts', 'retryPromptUrl',
'speechTimeout', 'timeout', 'say', 'play'
].forEach((k) => this[k] = this.data[k]);
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
this.timeout = (this.timeout || 5) * 1000;
this.interim = this.partialResultCallback;
this.logger.debug({opts}, 'created gather task');
this.timeout = (this.timeout || 15) * 1000;
this.interim = this.partialResultCallback || this.bargein;
if (this.data.recognizer) {
const recognizer = this.data.recognizer;
this.vendor = recognizer.vendor;
@@ -30,10 +36,6 @@ class TaskGather extends Task {
this.hints = recognizer.hints || [];
this.altLanguages = recognizer.altLanguages || [];
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
this.vad = {enable, voiceMs, mode};
/* aws options */
this.vocabularyName = recognizer.vocabularyName;
this.vocabularyFilterName = recognizer.vocabularyFilterName;
@@ -52,6 +54,12 @@ class TaskGather extends Task {
if (this.say) this.sayTask = makeTask(this.logger, {say: this.say}, this);
if (this.play) this.playTask = makeTask(this.logger, {play: this.play}, this);
if(this.sayTask || this.playTask){
// this is specially for barge in where we want to make a bargebale promt
// to a user without listening after the say task has finished
this.listenAfterSpeech = typeof this.data.listenAfterSpeech === "boolean" ? this.data.listenAfterSpeech : true;
}
this.parentTask = parentTask;
}
@@ -84,33 +92,63 @@ class TaskGather extends Task {
throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`);
}
const startListening = (cs, ep) => {
this._startTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
this._initSpeech(cs, ep)
.then(() => {
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
})
.catch(() => {});
}
};
try {
if (this.sayTask) {
this.sayTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete
this.sayTask.on('playDone', (err) => {
if (!this.killed) this._startTimer();
this.logger.debug('Gather: kicking off say task');
this.sayTask.exec(cs, ep);
this.sayTask.on('playDone', async(err) => {
if (err) return this.logger.error({err}, 'Gather:exec Error playing tts');
this.logger.debug('Gather: say task completed');
if (!this.killed) {
if (this.listenAfterSpeech === true) {
startListening(cs, ep);
} else {
this.notifyTaskDone();
}
}
});
}
else if (this.playTask) {
this.playTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete
this.playTask.on('playDone', (err) => {
if (!this.killed) this._startTimer();
});
this.playTask.on('playDone', async(err) => {
if (err) return this.logger.error({err}, 'Gather:exec Error playing url');
if (!this.killed) {
if (this.listenAfterSpeech === true) {
startListening(cs, ep);
} else {
this.notifyTaskDone();
}
}
}
);
}
else this._startTimer();
else startListening(cs, ep);
if (this.input.includes('speech')) {
if (this.input.includes('speech') && this.listenDuringPrompt) {
await this._initSpeech(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
}
if (this.input.includes('digits')) {
if (this.input.includes('digits') || this.dtmfBargein) {
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
}
await this.awaitTaskDone();
this.logger.debug('Gather:exec task has completed');
} catch (err) {
this.logger.error(err, 'TaskGather:exec error');
}
@@ -122,6 +160,7 @@ class TaskGather extends Task {
}
kill(cs) {
this.logger.debug('Gather:kill');
super.kill(cs);
this._killAudio(cs);
this.ep.removeAllListeners('dtmf');
@@ -130,23 +169,33 @@ class TaskGather extends Task {
_onDtmf(cs, ep, evt) {
this.logger.debug(evt, 'TaskGather:_onDtmf');
if (evt.dtmf === this.finishOnKey) this._resolve('dtmf-terminator-key');
clearTimeout(this.interDigitTimer);
let resolved = false;
if (this.dtmfBargein) this._killAudio(cs);
if (evt.dtmf === this.finishOnKey) {
resolved = true;
this._resolve('dtmf-terminator-key');
}
else {
this.digitBuffer += evt.dtmf;
if (this.digitBuffer.length === this.numDigits) this._resolve('dtmf-num-digits');
const len = this.digitBuffer.length;
if (len === this.numDigits || len === this.maxDigits) {
resolved = true;
this._resolve('dtmf-num-digits');
}
}
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
/* start interDigitTimer */
const ms = this.interDigitTimeout * 1000;
this.logger.debug(`starting interdigit timer of ${ms}`);
this.interDigitTimer = setTimeout(() => this._resolve('dtmf-interdigit-timeout'), ms);
}
this._killAudio(cs);
}
async _initSpeech(cs, ep) {
const opts = {};
if (this.vad.enable) {
opts.START_RECOGNIZING_ON_VAD = 1;
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
}
if ('google' === this.vendor) {
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
Object.assign(opts, {
@@ -207,7 +256,7 @@ class TaskGather extends Task {
ep.startTranscription({
vendor: this.vendor,
locale: this.language,
interim: this.partialResultCallback ? true : false,
interim: this.interim,
}).catch((err) => {
const {writeAlerts, AlertType} = this.cs.srf.locals;
this.logger.error(err, 'TaskGather:_startTranscribing error');
@@ -247,25 +296,56 @@ class TaskGather extends Task {
}
_onTranscription(cs, ep, evt) {
this.logger.debug(evt, 'TaskGather:_onTranscription');
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
if ('microsoft' === this.vendor) {
const nbest = evt.NBest;
const newEvent = {
is_final: evt.RecognitionStatus === 'Success',
alternatives: [
{
confidence: nbest[0].Confidence,
transcript: nbest[0].Display
}
]
};
evt = newEvent;
const final = evt.RecognitionStatus === 'Success';
if (final) {
const nbest = evt.NBest;
evt = {
is_final: true,
alternatives: [
{
confidence: nbest[0].Confidence,
transcript: nbest[0].Display
}
]
};
}
else {
evt = {
is_final: false,
alternatives: [
{
transcript: evt.Text
}
]
};
}
}
this.logger.debug(evt, 'TaskGather:_onTranscription');
if (evt.is_final) this._resolve('speech', evt);
else if (this.partialResultHook) {
this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
else {
const recognizeSuccess = evt.stability > GATHER_STABILITY_THRESHOLD;
/*
we need to make sure to only send something on barge in if we have
something valid therefore we need to check the recognition
stability, which applies to GOOGLE
for MS we will have a final event, meaning we will not run into
the current if else branch.
For AWS we still need more testing
*/
if (recognizeSuccess &&
this.bargein &&
evt.alternatives[0].transcript.split(' ').length >= this.minBargeinWordCount) {
this.logger.debug('Gather:_onTranscription - killing audio due to bargein');
this._killAudio(cs);
this._resolve('speech', evt);
}
if (this.partialResultHook) {
this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
}
}
}
_onEndOfUtterance(cs, ep) {
@@ -291,7 +371,8 @@ class TaskGather extends Task {
this._clearTimer();
if (reason.startsWith('dtmf')) {
await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'});
if (this.parentTask) this.parentTask.emit('dtmf-collected', {reason, digits: this.digitBuffer});
else await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'});
}
else if (reason.startsWith('speech')) {
if (this.parentTask) this.parentTask.emit('transcription', evt);