Feat/fallback speech 02 (#429)

* feat fallback speech

* feat fallback speech

* feat fallback speech

* gather verb

* fix

* wip

* wip

* wip

* wip

* wip

* transcribe

* transcribe

* fix

* wip

* wip

* wip

* wip

* fix

* wip

* fix
This commit is contained in:
Hoan Luu Huu
2023-08-22 20:22:01 +07:00
committed by GitHub
parent 6f0dbef433
commit b1ecf069bf
13 changed files with 501 additions and 223 deletions

View File

@@ -1,7 +1,5 @@
const Task = require('./task');
const {
TaskName,
TaskPreconditions,
GoogleTranscriptionEvents,
NuanceTranscriptionEvents,
AwsTranscriptionEvents,
@@ -19,6 +17,7 @@ const {
} = require('../config');
const makeTask = require('./make_task');
const assert = require('assert');
const SttTask = require('./stt-task');
const compileTranscripts = (logger, evt, arr) => {
if (!Array.isArray(arr) || arr.length === 0) return;
@@ -30,23 +29,9 @@ const compileTranscripts = (logger, evt, arr) => {
evt.alternatives[0].transcript = t.trim();
};
class TaskGather extends Task {
class TaskGather extends SttTask {
constructor(logger, opts, parentTask) {
super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint;
const {
setChannelVarsForStt,
normalizeTranscription,
removeSpeechListeners,
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
this.removeSpeechListeners = removeSpeechListeners;
this.compileSonioxTranscripts = compileSonioxTranscripts;
super(logger, opts, parentTask);
[
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
@@ -62,16 +47,10 @@ class TaskGather extends Task {
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
if (this.data.recognizer) {
const recognizer = this.data.recognizer;
this.vendor = recognizer.vendor;
this.language = recognizer.language;
/* let credentials be supplied in the recognizer object at runtime */
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
this.asrTimeout = typeof recognizer.asrTimeout === 'number' ? recognizer.asrTimeout * 1000 : 0;
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
this.asrTimeout = typeof this.data.recognizer.asrTimeout === 'number' ?
this.data.recognizer.asrTimeout * 1000 : 0;
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = this.data.recognizer.asrDtmfTerminationDigit;
this.isContinuousAsr = this.asrTimeout > 0;
if (Array.isArray(this.data.recognizer.hints) &&
@@ -79,10 +58,7 @@ class TaskGather extends Task {
logger.debug('Gather: an empty hints array was supplied, so we will mask global hints');
this.maskGlobalSttHints = true;
}
this.data.recognizer.hints = this.data.recognizer.hints || [];
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages || [];
}
else this.data.recognizer = {hints: [], altLanguages: []};
this.digitBuffer = '';
this._earlyMedia = this.data.earlyMedia === true;
@@ -97,11 +73,6 @@ class TaskGather extends Task {
/* buffer speech for continuous asr */
this._bufferedTranscripts = [];
/* buffer for soniox transcripts */
this._sonioxTranscripts = [];
this.parentTask = parentTask;
this.partialTranscriptsCount = 0;
}
@@ -137,7 +108,6 @@ class TaskGather extends Task {
this.logger.debug({options: this.data}, 'Gather:exec');
await super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
const {hints, hintsBoost} = cs.globalSttHints;
@@ -184,58 +154,59 @@ class TaskGather extends Task {
this.language = cs.speechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.language = this.language;
}
if ('default' === this.label || !this.label) {
this.label = cs.speechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.label = this.label;
}
// Fallback options
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
}
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
}
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
}
if (!this.data.recognizer.vendor) {
this.data.recognizer.vendor = this.vendor;
}
if (this.needsStt && !this.sttCredentials) this.sttCredentials =
cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
if (this.needsStt && !this.sttCredentials) {
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info(`TaskGather:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_NOT_PROVISIONED,
vendor: this.vendor
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
// Notify application that STT vender is wrong.
this.notifyError({
msg: 'ASR error',
details: `No speech-to-text service credentials for ${this.vendor} have been configured`
});
this.notifyTaskDone();
throw new Error(`No speech-to-text service credentials for ${this.vendor} have been configured`);
try {
this.sttCredentials = await this._initSpeechCredentials(cs, this.vendor, this.label);
} catch (error) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
} else {
throw error;
}
}
}
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
/* get nuance access token */
const {client_id, secret} = this.sttCredentials;
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
this.logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
this.sttCredentials = {...this.sttCredentials, access_token};
}
else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
/* get ibm access token */
const {stt_api_key, stt_region} = this.sttCredentials;
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
}
const startListening = (cs, ep) => {
const startListening = async(cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
this._initSpeech(cs, ep)
.then(() => {
if (this.killed) {
this.logger.info('Gather:exec - task was quickly killed so do not transcribe');
return;
}
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
})
.catch((err) => {
this.logger.error({err}, 'error in initSpeech');
});
try {
await this._initSpeech(cs, ep);
if (this.killed) {
this.logger.info('Gather:exec - task was quickly killed so do not transcribe');
return;
}
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
} catch (e) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
startListening(cs, ep);
} else {
this.logger.error({error: e}, 'error in initSpeech');
}
}
}
};
@@ -705,8 +676,22 @@ class TaskGather extends Task {
_onJambonzConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onJambonzConnect');
}
_onJambonzError(cs, _ep, evt) {
async _onJambonzError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskGather:_onJambonzError');
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
_ep.stopTranscription({vendor: this.vendor})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
try {
await this._fallback();
await this._initSpeech(cs, _ep);
this._startTranscribing(_ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
return;
} catch (error) {
this.logger.info({error}, `There is error while falling back to ${this.fallbackVendor}`);
}
}
const {writeAlerts, AlertType} = cs.srf.locals;
if (this.vendor === 'nuance') {