mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-02-13 01:39:26 +00:00
Feat/fallback speech 02 (#429)
* feat fallback speech * feat fallback speech * feat fallback speech * gather verb * fix * wip * wip * wip * wip * wip * transcribe * transcribe * fix * wip * wip * wip * wip * fix * wip * fix
This commit is contained in:
@@ -1,8 +1,6 @@
|
||||
const Task = require('./task');
|
||||
const assert = require('assert');
|
||||
const {
|
||||
TaskName,
|
||||
TaskPreconditions,
|
||||
GoogleTranscriptionEvents,
|
||||
NuanceTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
@@ -14,47 +12,21 @@ const {
|
||||
JambonzTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
const { normalizeJambones } = require('@jambonz/verb-specifications');
|
||||
const SttTask = require('./stt-task');
|
||||
|
||||
const STT_LISTEN_SPAN_NAME = 'stt-listen';
|
||||
|
||||
class TaskTranscribe extends Task {
|
||||
class TaskTranscribe extends SttTask {
|
||||
constructor(logger, opts, parentTask) {
|
||||
super(logger, opts);
|
||||
this.preconditions = TaskPreconditions.Endpoint;
|
||||
this.parentTask = parentTask;
|
||||
|
||||
const {
|
||||
setChannelVarsForStt,
|
||||
normalizeTranscription,
|
||||
removeSpeechListeners,
|
||||
setSpeechCredentialsAtRuntime,
|
||||
compileSonioxTranscripts
|
||||
} = require('../utils/transcription-utils')(logger);
|
||||
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||
this.normalizeTranscription = normalizeTranscription;
|
||||
this.removeSpeechListeners = removeSpeechListeners;
|
||||
this.compileSonioxTranscripts = compileSonioxTranscripts;
|
||||
super(logger, opts, parentTask);
|
||||
|
||||
this.transcriptionHook = this.data.transcriptionHook;
|
||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||
|
||||
if (this.data.recognizer) {
|
||||
const recognizer = this.data.recognizer;
|
||||
this.vendor = recognizer.vendor;
|
||||
this.language = recognizer.language;
|
||||
/* let credentials be supplied in the recognizer object at runtime */
|
||||
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
||||
|
||||
this.interim = !!recognizer.interim;
|
||||
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
|
||||
|
||||
this.data.recognizer.hints = this.data.recognizer.hints || [];
|
||||
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages || [];
|
||||
this.interim = !!this.data.recognizer.interim;
|
||||
this.separateRecognitionPerChannel = this.data.recognizer.separateRecognitionPerChannel;
|
||||
}
|
||||
else this.data.recognizer = {hints: [], altLanguages: []};
|
||||
|
||||
/* buffer for soniox transcripts */
|
||||
this._sonioxTranscripts = [];
|
||||
|
||||
this.childSpan = [null, null];
|
||||
|
||||
@@ -70,7 +42,6 @@ class TaskTranscribe extends Task {
|
||||
async exec(cs, {ep, ep2}) {
|
||||
super.exec(cs);
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
|
||||
|
||||
if (cs.hasGlobalSttHints) {
|
||||
const {hints, hintsBoost} = cs.globalSttHints;
|
||||
@@ -98,39 +69,40 @@ class TaskTranscribe extends Task {
|
||||
this.language = cs.speechRecognizerLanguage;
|
||||
if (this.data.recognizer) this.data.recognizer.language = this.language;
|
||||
}
|
||||
if ('default' === this.label || !this.label) {
|
||||
this.label = cs.speechRecognizerLabel;
|
||||
if (this.data.recognizer) this.data.recognizer.label = this.label;
|
||||
}
|
||||
// Fallback options
|
||||
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
|
||||
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
|
||||
}
|
||||
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
|
||||
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
|
||||
}
|
||||
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
|
||||
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
|
||||
}
|
||||
if (!this.data.recognizer.vendor) {
|
||||
this.data.recognizer.vendor = this.vendor;
|
||||
}
|
||||
if (!this.sttCredentials) this.sttCredentials =
|
||||
cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
|
||||
|
||||
if (!this.sttCredentials) {
|
||||
try {
|
||||
this.sttCredentials = await this._initSpeechCredentials(cs, this.vendor, this.label);
|
||||
} catch (error) {
|
||||
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
|
||||
await this._fallback();
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (!this.sttCredentials) {
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info(`TaskTranscribe:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_NOT_PROVISIONED,
|
||||
vendor: this.vendor
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
||||
throw new Error('no provisioned speech credentials for TTS');
|
||||
}
|
||||
|
||||
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
|
||||
/* get nuance access token */
|
||||
const {client_id, secret} = this.sttCredentials;
|
||||
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||
this.logger.debug({client_id},
|
||||
`Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
this.sttCredentials = {...this.sttCredentials, access_token};
|
||||
}
|
||||
else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
|
||||
/* get ibm access token */
|
||||
const {stt_api_key, stt_region} = this.sttCredentials;
|
||||
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
||||
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
|
||||
}
|
||||
await this._startTranscribing(cs, ep, 1);
|
||||
if (this.separateRecognitionPerChannel && ep2) {
|
||||
await this._startTranscribing(cs, ep2, 2);
|
||||
@@ -494,23 +466,41 @@ class TaskTranscribe extends Task {
|
||||
_onIbmError(cs, _ep, _channel, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
|
||||
}
|
||||
_onJambonzError(cs, _ep, evt) {
|
||||
async _onJambonzError(cs, _ep, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
|
||||
_ep.stopTranscription({vendor: this.vendor})
|
||||
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||
try {
|
||||
await this._fallback();
|
||||
let channel = 1;
|
||||
if (this.ep !== _ep) {
|
||||
channel = 2;
|
||||
}
|
||||
this._startTranscribing(cs, _ep, channel);
|
||||
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
|
||||
return;
|
||||
} catch (error) {
|
||||
this.logger.info({error}, `There is error while falling back to ${this.fallbackVendor}`);
|
||||
}
|
||||
} else {
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
|
||||
if (this.vendor === 'nuance') {
|
||||
const {code, error} = evt;
|
||||
if (code === 404 && error === 'No speech') return this._resolve('timeout');
|
||||
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
|
||||
if (this.vendor === 'nuance') {
|
||||
const {code, error} = evt;
|
||||
if (code === 404 && error === 'No speech') return this._resolve('timeout');
|
||||
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
|
||||
}
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
|
||||
}
|
||||
|
||||
_startAsrTimer(channel) {
|
||||
|
||||
Reference in New Issue
Block a user