Compare commits

...

15 Commits

Author SHA1 Message Date
akirilyuk
b31a8db9de do not resolve gather on barge in 2022-02-27 00:01:38 +01:00
akirilyuk
1d1388583b fix linting remove logs 2022-02-26 23:37:06 +01:00
akirilyuk
942c73e64b fix start listening 2022-02-26 23:32:00 +01:00
akirilyuk
de61cc1d3d enhance error log 2022-02-26 23:30:36 +01:00
akirilyuk
32ebc7017d add try catch in init speech 2022-02-26 23:30:12 +01:00
akirilyuk
87ef510dea more logs 2022-02-26 23:27:37 +01:00
akirilyuk
22440843d9 add more logs 2022-02-26 23:27:00 +01:00
akirilyuk
48fa3133ef add more logs 2022-02-26 23:22:13 +01:00
akirilyuk
a3352cc932 resolve with speech on barge in 2022-02-26 23:12:58 +01:00
akirilyuk
2de54ddedf start listening if not say nor play provided 2022-02-26 22:21:24 +01:00
akirilyuk
812b40fcb0 add possibility to not listen after gather say/play finished 2022-02-26 22:05:21 +01:00
akirilyuk
9a0e33e4f7 add listenAfterSpeech to gather spec 2022-02-26 21:57:09 +01:00
Dave Horton
c7e141abf1 gather: support for min/max digits and interdigit timeout 2022-02-26 15:30:35 -05:00
Dave Horton
e30701c4b4 bugfix: gather handles interim results from azure 2022-02-26 14:49:34 -05:00
Dave Horton
c5d392af6a add bargein support to gather 2022-02-26 14:41:21 -05:00
3 changed files with 193 additions and 87 deletions

View File

@@ -9,6 +9,7 @@ const {
const makeTask = require('./make_task');
const assert = require('assert');
const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7);
class TaskGather extends Task {
constructor(logger, opts, parentTask) {
@@ -16,12 +17,15 @@ class TaskGather extends Task {
this.preconditions = TaskPreconditions.Endpoint;
[
'finishOnKey', 'hints', 'input', 'numDigits',
'partialResultHook',
'finishOnKey', 'hints', 'input', 'numDigits', 'minDigits', 'maxDigits',
'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
'speechTimeout', 'timeout', 'say', 'play'
].forEach((k) => this[k] = this.data[k]);
this.timeout = (this.timeout || 5) * 1000;
/* when collecting dtmf, bargein on dtmf is true unless explicitly set to false */
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
this.timeout = (this.timeout || 15) * 1000;
this.interim = this.partialResultCallback;
if (this.data.recognizer) {
const recognizer = this.data.recognizer;
@@ -44,6 +48,10 @@ class TaskGather extends Task {
this.profanityOption = recognizer.profanityOption || 'raw';
this.requestSnr = recognizer.requestSnr || false;
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
/* barge in configuration */
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
}
this.digitBuffer = '';
@@ -52,6 +60,12 @@ class TaskGather extends Task {
if (this.say) this.sayTask = makeTask(this.logger, {say: this.say}, this);
if (this.play) this.playTask = makeTask(this.logger, {play: this.play}, this);
if (this.sayTask || this.playTask) {
// this is specially for barge in where we want to make a bargebale promt
// to a user without listening after the say task has finished
this.listenAfterSpeech = typeof this.data.listenAfterSpeech === 'boolean' ? this.data.listenAfterSpeech : true;
}
this.parentTask = parentTask;
}
@@ -84,29 +98,56 @@ class TaskGather extends Task {
throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`);
}
const startListening = (cs, ep) => {
this._startTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
this._initSpeech(cs, ep)
.then(() => {
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
})
.catch(() => {});
}
};
try {
if (this.sayTask) {
this.sayTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete
this.sayTask.on('playDone', (err) => {
if (!this.killed) this._startTimer();
if (err) return this.logger.error({err}, 'Gather:exec Error playing tts');
this.logger.info('Gather: say task completed');
if (!this.killed) {
if (this.listenAfterSpeech === true) {
startListening(cs, ep);
} else {
this.notifyTaskDone();
}
}
});
}
else if (this.playTask) {
this.playTask.exec(cs, ep); // kicked off, _not_ waiting for it to complete
this.playTask.on('playDone', (err) => {
if (!this.killed) this._startTimer();
if (err) return this.logger.error({err}, 'Gather:exec Error playing url');
if (!this.killed) {
if (this.listenAfterSpeech === true) {
startListening(cs, ep);
} else {
this.notifyTaskDone();
}
}
});
}
else this._startTimer();
else startListening(cs, ep);
if (this.input.includes('speech')) {
if (this.input.includes('speech') && this.listenDuringPrompt) {
await this._initSpeech(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
}
if (this.input.includes('digits')) {
if (this.input.includes('digits') || this.dtmfBargein) {
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
}
@@ -125,89 +166,111 @@ class TaskGather extends Task {
super.kill(cs);
this._killAudio(cs);
this.ep.removeAllListeners('dtmf');
clearTimeout(this.interDigitTimer);
this._resolve('killed');
}
_onDtmf(cs, ep, evt) {
this.logger.debug(evt, 'TaskGather:_onDtmf');
if (evt.dtmf === this.finishOnKey) this._resolve('dtmf-terminator-key');
clearTimeout(this.interDigitTimer);
let resolved = false;
if (this.dtmfBargein) this._killAudio(cs);
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
resolved = true;
this._resolve('dtmf-terminator-key');
}
else {
this.digitBuffer += evt.dtmf;
if (this.digitBuffer.length === this.numDigits) this._resolve('dtmf-num-digits');
const len = this.digitBuffer.length;
if (len === this.numDigits || len === this.maxDigits) {
resolved = true;
this._resolve('dtmf-num-digits');
}
}
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
/* start interDigitTimer */
const ms = this.interDigitTimeout * 1000;
this.logger.debug(`starting interdigit timer of ${ms}`);
this.interDigitTimer = setTimeout(() => this._resolve('dtmf-interdigit-timeout'), ms);
}
this._killAudio(cs);
}
async _initSpeech(cs, ep) {
const opts = {};
try {
const opts = {};
if (this.vad.enable) {
opts.START_RECOGNIZING_ON_VAD = 1;
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
}
if (this.vad.enable) {
opts.START_RECOGNIZING_ON_VAD = 1;
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
}
if ('google' === this.vendor) {
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
Object.assign(opts, {
GOOGLE_SPEECH_USE_ENHANCED: true,
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
GOOGLE_SPEECH_MODEL: 'command_and_search'
});
if (this.hints && this.hints.length > 1) {
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
}
if (this.altLanguages && this.altLanguages.length > 1) {
opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
}
if (this.profanityFilter === true) {
Object.assign(opts, {'GOOGLE_SPEECH_PROFANITY_FILTER': true});
}
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
}
else if (['aws', 'polly'].includes(this.vendor)) {
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
if (this.vocabularyFilterName) {
opts.AWS_VOCABULARY_NAME = this.vocabularyFilterName;
opts.AWS_VOCABULARY_FILTER_METHOD = this.filterMethod || 'mask';
}
if (this.sttCredentials) {
if ('google' === this.vendor) {
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
Object.assign(opts, {
AWS_ACCESS_KEY_ID: this.sttCredentials.accessKeyId,
AWS_SECRET_ACCESS_KEY: this.sttCredentials.secretAccessKey,
AWS_REGION: this.sttCredentials.region
GOOGLE_SPEECH_USE_ENHANCED: true,
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
GOOGLE_SPEECH_MODEL: 'command_and_search'
});
if (this.hints && this.hints.length > 1) {
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
}
if (this.altLanguages && this.altLanguages.length > 1) {
opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
}
if (this.profanityFilter === true) {
Object.assign(opts, {'GOOGLE_SPEECH_PROFANITY_FILTER': true});
}
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
}
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
}
else if ('microsoft' === this.vendor) {
if (this.sttCredentials) {
Object.assign(opts, {
'AZURE_SUBSCRIPTION_KEY': this.sttCredentials.api_key,
'AZURE_REGION': this.sttCredentials.region
});
else if (['aws', 'polly'].includes(this.vendor)) {
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
if (this.vocabularyFilterName) {
opts.AWS_VOCABULARY_NAME = this.vocabularyFilterName;
opts.AWS_VOCABULARY_FILTER_METHOD = this.filterMethod || 'mask';
}
if (this.sttCredentials) {
Object.assign(opts, {
AWS_ACCESS_KEY_ID: this.sttCredentials.accessKeyId,
AWS_SECRET_ACCESS_KEY: this.sttCredentials.secretAccessKey,
AWS_REGION: this.sttCredentials.region
});
}
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
}
if (this.hints && this.hints.length > 1) {
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
}
//if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
//if (this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
else if ('microsoft' === this.vendor) {
if (this.sttCredentials) {
Object.assign(opts, {
'AZURE_SUBSCRIPTION_KEY': this.sttCredentials.api_key,
'AZURE_REGION': this.sttCredentials.region
});
}
if (this.hints && this.hints.length > 1) {
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
}
//if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
//if (this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep));
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected,
this._onNoSpeechDetected.bind(this, cs, ep));
}
await ep.set(opts)
.catch((err) => this.logger.error(err, 'Error setting channel variables'));
} catch (err) {
this.logger.error(err, 'could not init speech for listening');
}
await ep.set(opts)
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
}
_startTranscribing(ep) {
ep.startTranscription({
vendor: this.vendor,
locale: this.language,
interim: this.partialResultCallback ? true : false,
interim: this.partialResultCallback || this.bargein,
}).catch((err) => {
const {writeAlerts, AlertType} = this.cs.srf.locals;
this.logger.error(err, 'TaskGather:_startTranscribing error');
@@ -249,25 +312,50 @@ class TaskGather extends Task {
_onTranscription(cs, ep, evt) {
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
if ('microsoft' === this.vendor) {
const nbest = evt.NBest;
const newEvent = {
is_final: evt.RecognitionStatus === 'Success',
alternatives: [
{
confidence: nbest[0].Confidence,
transcript: nbest[0].Display
}
]
};
evt = newEvent;
const final = evt.RecognitionStatus === 'Success';
if (final) {
const nbest = evt.NBest;
evt = {
is_final: true,
alternatives: [
{
confidence: nbest[0].Confidence,
transcript: nbest[0].Display
}
]
};
}
else {
evt = {
is_final: false,
alternatives: [
{
transcript: evt.Text
}
]
};
}
}
this.logger.debug(evt, 'TaskGather:_onTranscription');
if (evt.is_final) this._resolve('speech', evt);
else if (this.partialResultHook) {
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
else {
/* google has a measure of stability:
https://cloud.google.com/speech-to-text/docs/basics#streaming_responses
others do not.
*/
const isStableEnough = typeof evt.stability === 'undefined' || evt.stability > GATHER_STABILITY_THRESHOLD;
if (this.bargein && isStableEnough &&
evt.alternatives[0].transcript.split(' ').length >= this.minBargeinWordCount) {
this.logger.debug('Gather:_onTranscription - killing audio due to speech bargein');
this._killAudio(cs);
}
if (this.partialResultHook) {
this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
}
}
}
_onEndOfUtterance(cs, ep) {
this.logger.info('TaskGather:_onEndOfUtterance');
if (!this.resolved && !this.killed) {
@@ -283,6 +371,7 @@ class TaskGather extends Task {
if (this.resolved) return;
this.resolved = true;
this.logger.debug(`TaskGather:resolve with reason ${reason}`);
clearTimeout(this.interDigitTimer);
if (this.ep && this.ep.connected) {
this.ep.stopTranscription({vendor: this.vendor})

View File

@@ -21,15 +21,20 @@ class TaskSay extends Task {
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals;
const {synthAudio} = srf.locals.dbHelpers;
const hasVerbLevelTts = this.synthesizer.vendor && this.synthesizer.vendor !== 'default';
const vendor = hasVerbLevelTts ? this.synthesizer.vendor : cs.speechSynthesisVendor ;
const language = hasVerbLevelTts ? this.synthesizer.language : cs.speechSynthesisLanguage ;
const voice = hasVerbLevelTts ? this.synthesizer.voice : cs.speechSynthesisVoice ;
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
this.synthesizer.vendor :
cs.speechSynthesisVendor;
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
this.synthesizer.language :
cs.speechSynthesisLanguage ;
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
this.synthesizer.voice :
cs.speechSynthesisVoice;
const engine = this.synthesizer.engine || 'standard';
const salt = cs.callSid;
const credentials = cs.getSpeechCredentials(vendor, 'tts');
this.logger.info({language, voice}, `Task:say - using vendor: ${vendor}`);
this.logger.info({vendor, language, voice}, 'TaskSay:exec');
this.ep = ep;
try {
if (!credentials) {
@@ -80,7 +85,11 @@ class TaskSay extends Task {
const {memberId, confName, confUuid} = cs;
await this.playToConfMember(this.ep, memberId, confName, confUuid, filepath[segment]);
}
else await ep.play(filepath[segment]);
else {
this.logger.debug(`Say:exec sending command to play file ${filepath[segment]}`);
await ep.play(filepath[segment]);
this.logger.debug(`Say:exec completed play file ${filepath[segment]}`);
}
} while (!this.killed && ++segment < filepath.length);
}
} catch (err) {

View File

@@ -98,8 +98,16 @@
"finishOnKey": "string",
"input": "array",
"numDigits": "number",
"minDigits": "number",
"maxDigits": "number",
"interDigitTimeout": "number",
"partialResultHook": "object|string",
"speechTimeout": "number",
"listenDuringPrompt": "boolean",
"listenAfterSpeech": "boolean",
"dtmfBargein": "boolean",
"bargein": "boolean",
"minBargeinWordCount": "number",
"timeout": "number",
"recognizer": "#recognizer",
"play": "#play",