add say and gather task features

This commit is contained in:
akirilyuk
2022-02-14 14:08:56 +01:00
parent c1130adf03
commit 15f85c9730
2 changed files with 139 additions and 49 deletions

View File

@@ -10,19 +10,25 @@ const {
const makeTask = require('./make_task'); const makeTask = require('./make_task');
const assert = require('assert'); const assert = require('assert');
const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7);
class TaskGather extends Task { class TaskGather extends Task {
constructor(logger, opts, parentTask) { constructor(logger, opts, parentTask) {
super(logger, opts); super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint; this.preconditions = TaskPreconditions.Endpoint;
[ [
'finishOnKey', 'hints', 'input', 'numDigits', 'finishOnKey', 'hints', 'input', 'numDigits', 'minDigits', 'maxDigits',
'partialResultHook', 'interDigitTimeout', 'submitDigit', 'partialResultHook', 'bargein', 'dtmfBargein',
'retries', 'retryPromptTts', 'retryPromptUrl',
'speechTimeout', 'timeout', 'say', 'play' 'speechTimeout', 'timeout', 'say', 'play'
].forEach((k) => this[k] = this.data[k]); ].forEach((k) => this[k] = this.data[k]);
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
this.timeout = (this.timeout || 5) * 1000; this.logger.debug({opts}, 'created gather task');
this.interim = this.partialResultCallback; this.timeout = (this.timeout || 15) * 1000;
this.interim = this.partialResultCallback || this.bargein;
if (this.data.recognizer) { if (this.data.recognizer) {
const recognizer = this.data.recognizer; const recognizer = this.data.recognizer;
this.vendor = recognizer.vendor; this.vendor = recognizer.vendor;
@@ -30,10 +36,6 @@ class TaskGather extends Task {
this.hints = recognizer.hints || []; this.hints = recognizer.hints || [];
this.altLanguages = recognizer.altLanguages || []; this.altLanguages = recognizer.altLanguages || [];
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
this.vad = {enable, voiceMs, mode};
/* aws options */ /* aws options */
this.vocabularyName = recognizer.vocabularyName; this.vocabularyName = recognizer.vocabularyName;
this.vocabularyFilterName = recognizer.vocabularyFilterName; this.vocabularyFilterName = recognizer.vocabularyFilterName;
@@ -52,6 +54,12 @@ class TaskGather extends Task {
if (this.say) this.sayTask = makeTask(this.logger, {say: this.say}, this); if (this.say) this.sayTask = makeTask(this.logger, {say: this.say}, this);
if (this.play) this.playTask = makeTask(this.logger, {play: this.play}, this); if (this.play) this.playTask = makeTask(this.logger, {play: this.play}, this);
if(this.sayTask || this.playTask){
// this is specially for barge in where we want to make a bargebale promt
// to a user without listening after the say task has finished
this.listenAfterSpeech = typeof this.data.listenAfterSpeech === "boolean" ? this.data.listenAfterSpeech : true;
}
this.parentTask = parentTask; this.parentTask = parentTask;
} }
@@ -84,33 +92,63 @@ class TaskGather extends Task {
throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`); throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`);
} }
const startListening = (cs, ep) => {
this._startTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
this._initSpeech(cs, ep)
.then(() => {
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
})
.catch(() => {});
}
};
try { try {
if (this.sayTask) { if (this.sayTask) {
this.sayTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete this.logger.debug('Gather: kicking off say task');
this.sayTask.on('playDone', (err) => { this.sayTask.exec(cs, ep);
if (!this.killed) this._startTimer(); this.sayTask.on('playDone', async(err) => {
if (err) return this.logger.error({err}, 'Gather:exec Error playing tts');
this.logger.debug('Gather: say task completed');
if (!this.killed) {
if (this.listenAfterSpeech === true) {
startListening(cs, ep);
} else {
this.notifyTaskDone();
}
}
}); });
} }
else if (this.playTask) { else if (this.playTask) {
this.playTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete this.playTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete
this.playTask.on('playDone', (err) => { this.playTask.on('playDone', async(err) => {
if (!this.killed) this._startTimer(); if (err) return this.logger.error({err}, 'Gather:exec Error playing url');
}); if (!this.killed) {
if (this.listenAfterSpeech === true) {
startListening(cs, ep);
} else {
this.notifyTaskDone();
} }
else this._startTimer(); }
}
);
}
else startListening(cs, ep);
if (this.input.includes('speech')) { if (this.input.includes('speech') && this.listenDuringPrompt) {
await this._initSpeech(cs, ep); await this._initSpeech(cs, ep);
this._startTranscribing(ep); this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid) updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */}); .catch(() => {/*already logged error */});
} }
if (this.input.includes('digits')) { if (this.input.includes('digits') || this.dtmfBargein) {
ep.on('dtmf', this._onDtmf.bind(this, cs, ep)); ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
} }
await this.awaitTaskDone(); await this.awaitTaskDone();
this.logger.debug('Gather:exec task has completed');
} catch (err) { } catch (err) {
this.logger.error(err, 'TaskGather:exec error'); this.logger.error(err, 'TaskGather:exec error');
} }
@@ -122,6 +160,7 @@ class TaskGather extends Task {
} }
kill(cs) { kill(cs) {
this.logger.debug('Gather:kill');
super.kill(cs); super.kill(cs);
this._killAudio(cs); this._killAudio(cs);
this.ep.removeAllListeners('dtmf'); this.ep.removeAllListeners('dtmf');
@@ -130,23 +169,33 @@ class TaskGather extends Task {
_onDtmf(cs, ep, evt) { _onDtmf(cs, ep, evt) {
this.logger.debug(evt, 'TaskGather:_onDtmf'); this.logger.debug(evt, 'TaskGather:_onDtmf');
if (evt.dtmf === this.finishOnKey) this._resolve('dtmf-terminator-key'); clearTimeout(this.interDigitTimer);
let resolved = false;
if (this.dtmfBargein) this._killAudio(cs);
if (evt.dtmf === this.finishOnKey) {
resolved = true;
this._resolve('dtmf-terminator-key');
}
else { else {
this.digitBuffer += evt.dtmf; this.digitBuffer += evt.dtmf;
if (this.digitBuffer.length === this.numDigits) this._resolve('dtmf-num-digits'); const len = this.digitBuffer.length;
if (len === this.numDigits || len === this.maxDigits) {
resolved = true;
this._resolve('dtmf-num-digits');
}
}
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
/* start interDigitTimer */
const ms = this.interDigitTimeout * 1000;
this.logger.debug(`starting interdigit timer of ${ms}`);
this.interDigitTimer = setTimeout(() => this._resolve('dtmf-interdigit-timeout'), ms);
} }
this._killAudio(cs);
} }
async _initSpeech(cs, ep) { async _initSpeech(cs, ep) {
const opts = {}; const opts = {};
if (this.vad.enable) {
opts.START_RECOGNIZING_ON_VAD = 1;
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
}
if ('google' === this.vendor) { if ('google' === this.vendor) {
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials); if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
Object.assign(opts, { Object.assign(opts, {
@@ -207,7 +256,7 @@ class TaskGather extends Task {
ep.startTranscription({ ep.startTranscription({
vendor: this.vendor, vendor: this.vendor,
locale: this.language, locale: this.language,
interim: this.partialResultCallback ? true : false, interim: this.interim,
}).catch((err) => { }).catch((err) => {
const {writeAlerts, AlertType} = this.cs.srf.locals; const {writeAlerts, AlertType} = this.cs.srf.locals;
this.logger.error(err, 'TaskGather:_startTranscribing error'); this.logger.error(err, 'TaskGather:_startTranscribing error');
@@ -247,11 +296,14 @@ class TaskGather extends Task {
} }
_onTranscription(cs, ep, evt) { _onTranscription(cs, ep, evt) {
this.logger.debug(evt, 'TaskGather:_onTranscription');
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0]; if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
if ('microsoft' === this.vendor) { if ('microsoft' === this.vendor) {
const final = evt.RecognitionStatus === 'Success';
if (final) {
const nbest = evt.NBest; const nbest = evt.NBest;
const newEvent = { evt = {
is_final: evt.RecognitionStatus === 'Success', is_final: true,
alternatives: [ alternatives: [
{ {
confidence: nbest[0].Confidence, confidence: nbest[0].Confidence,
@@ -259,15 +311,43 @@ class TaskGather extends Task {
} }
] ]
}; };
evt = newEvent;
} }
this.logger.debug(evt, 'TaskGather:_onTranscription'); else {
evt = {
is_final: false,
alternatives: [
{
transcript: evt.Text
}
]
};
}
}
if (evt.is_final) this._resolve('speech', evt); if (evt.is_final) this._resolve('speech', evt);
else if (this.partialResultHook) { else {
const recognizeSuccess = evt.stability > GATHER_STABILITY_THRESHOLD;
/*
we need to make sure to only send something on barge in if we have
something valid therefore we need to check the recognition
stability, which applies to GOOGLE
for MS we will have a final event, meaning we will not run into
the current if else branch.
For AWS we still need more testing
*/
if (recognizeSuccess &&
this.bargein &&
evt.alternatives[0].transcript.split(' ').length >= this.minBargeinWordCount) {
this.logger.debug('Gather:_onTranscription - killing audio due to bargein');
this._killAudio(cs);
this._resolve('speech', evt);
}
if (this.partialResultHook) {
this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo)) this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error')); .catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
} }
} }
}
_onEndOfUtterance(cs, ep) { _onEndOfUtterance(cs, ep) {
this.logger.info('TaskGather:_onEndOfUtterance'); this.logger.info('TaskGather:_onEndOfUtterance');
if (!this.resolved && !this.killed) { if (!this.resolved && !this.killed) {
@@ -291,7 +371,8 @@ class TaskGather extends Task {
this._clearTimer(); this._clearTimer();
if (reason.startsWith('dtmf')) { if (reason.startsWith('dtmf')) {
await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'}); if (this.parentTask) this.parentTask.emit('dtmf-collected', {reason, digits: this.digitBuffer});
else await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'});
} }
else if (reason.startsWith('speech')) { else if (reason.startsWith('speech')) {
if (this.parentTask) this.parentTask.emit('transcription', evt); if (this.parentTask) this.parentTask.emit('transcription', evt);

View File

@@ -21,15 +21,20 @@ class TaskSay extends Task {
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals; const {writeAlerts, AlertType, stats} = srf.locals;
const {synthAudio} = srf.locals.dbHelpers; const {synthAudio} = srf.locals.dbHelpers;
const hasVerbLevelTts = this.synthesizer.vendor && this.synthesizer.vendor !== 'default'; const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ? this.synthesizer.vendor : cs.speechSynthesisVendor;
const vendor = hasVerbLevelTts ? this.synthesizer.vendor : cs.speechSynthesisVendor ; const language = this.synthesizer.language && this.synthesizer.language !== 'default' ? this.synthesizer.language : cs.speechSynthesisLanguage ;
const language = hasVerbLevelTts ? this.synthesizer.language : cs.speechSynthesisLanguage ; const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ? this.synthesizer.voice : cs.speechSynthesisVoice;
const voice = hasVerbLevelTts ? this.synthesizer.voice : cs.speechSynthesisVoice ;
const engine = this.synthesizer.engine || 'standard'; const engine = this.synthesizer.engine || 'standard';
const salt = cs.callSid; const salt = cs.callSid;
const credentials = cs.getSpeechCredentials(vendor, 'tts'); const credentials = cs.getSpeechCredentials(vendor, 'tts');
this.logger.info({language, voice}, `Task:say - using vendor: ${vendor}`); this.logger.info({language,
voice,
localSynthesizer: this.synthesizer,
speechSynthesisVendor: cs.speechSynthesisVendor,
speechSynthesisLanguage: cs.speechSynthesisLanguage,
speechSynthesisVoice: cs.speechSynthesisVoice
}, `Task:say - using vendor: ${vendor}`);
this.ep = ep; this.ep = ep;
try { try {
if (!credentials) { if (!credentials) {
@@ -79,7 +84,11 @@ class TaskSay extends Task {
const {memberId, confName, confUuid} = cs; const {memberId, confName, confUuid} = cs;
await this.playToConfMember(this.ep, memberId, confName, confUuid, filepath[segment]); await this.playToConfMember(this.ep, memberId, confName, confUuid, filepath[segment]);
} }
else await ep.play(filepath[segment]); else {
this.logger.debug(`Say:exec sending command to play file ${filepath[segment]}`);
await ep.play(filepath[segment]);
this.logger.debug(`Say:exec completed play file ${filepath[segment]}`);
}
} while (!this.killed && ++segment < filepath.length); } while (!this.killed && ++segment < filepath.length);
} }
} catch (err) { } catch (err) {