From 09961f564aaf98a0afd53c36211e984960394ea1 Mon Sep 17 00:00:00 2001 From: Quan HL Date: Fri, 18 Aug 2023 10:24:41 +0700 Subject: [PATCH] fix transcribe --- lib/tasks/gather.js | 28 ++++++---- lib/tasks/transcribe.js | 121 ++++++++++++++++++++++++++++------------ 2 files changed, 101 insertions(+), 48 deletions(-) diff --git a/lib/tasks/gather.js b/lib/tasks/gather.js index 72d0339c..5fa90482 100644 --- a/lib/tasks/gather.js +++ b/lib/tasks/gather.js @@ -226,17 +226,23 @@ class TaskGather extends Task { this.language = cs.speechRecognizerLanguage; if (this.data.recognizer) this.data.recognizer.language = this.language; } - this.fallbackVendor = this.data.recognizer && this.data.recognizer.fallbackVendor !== 'default' ? - this.data.recognizer.fallbackVendor : - cs.fallbackSpeechRecognizerVendor; - - this.fallbackLanguage = this.data.recognizer && this.data.recognizer.fallbackLanguage !== 'default' ? - this.data.recognizer.fallbackLanguage : - cs.fallbackSpeechRecognizerLanguage; - - this.fallbackLabel = this.data.recognizer && this.data.recognizer.fallbackLabel !== 'default' ? - this.data.recognizer.fallbackLabel : - cs.fallbackSpeechRecognizerLabel; + if ('default' === this.label || !this.label) { + this.label = cs.speechRecognizerLabel; + if (this.data.recognizer) this.data.recognizer.label = this.label; + } + // Fallback options + if ('default' === this.fallbackVendor || !this.fallbackVendor) { + this.fallbackVendor = cs.fallbackSpeechRecognizerVendor; + if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor; + } + if ('default' === this.fallbackLanguage || !this.fallbackLanguage) { + this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage; + if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage; + } + if ('default' === this.fallbackLabel || !this.fallbackLabel) { + this.fallbackLabel = cs.fallbackSpeechRecognizerLabel; + if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel; + } if (!this.data.recognizer.vendor) { this.data.recognizer.vendor = this.vendor; diff --git a/lib/tasks/transcribe.js b/lib/tasks/transcribe.js index 5e3cf9b5..b386ceac 100644 --- a/lib/tasks/transcribe.js +++ b/lib/tasks/transcribe.js @@ -42,6 +42,11 @@ class TaskTranscribe extends Task { const recognizer = this.data.recognizer; this.vendor = recognizer.vendor; this.language = recognizer.language; + this.label = recognizer.label; + + this.fallbackVendor = recognizer.fallbackVendor || 'default'; + this.fallbackLanguage = recognizer.fallbackLanguage || 'default'; + this.fallbackLabel = recognizer.fallbackLabel || 'default'; /* let credentials be supplied in the recognizer object at runtime */ this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer); @@ -67,6 +72,39 @@ class TaskTranscribe extends Task { get name() { return TaskName.Transcribe; } + async _initSpeechCredential(cs, vendor, label) { + let credentials = cs.getSpeechCredentials(vendor, 'stt', label); + + if (!credentials) { + const {writeAlerts, AlertType} = cs.srf.locals; + this.logger.info(`TaskTranscribe:exec - ERROR stt using ${vendor} requested but creds not supplied`); + writeAlerts({ + account_sid: cs.accountSid, + alert_type: AlertType.STT_NOT_PROVISIONED, + vendor: vendor + }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt')); + throw new Error('no provisioned speech credentials for TTS'); + } + + if (vendor === 'nuance' && credentials.client_id) { + /* get nuance access token */ + const {client_id, secret} = credentials; + const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts'); + this.logger.debug({client_id}, + `Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`); + credentials = {...credentials, access_token}; + } + else if (vendor == 'ibm' && credentials.stt_api_key) { + /* get ibm access token */ + const {stt_api_key, stt_region} = credentials; + const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key); + this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`); + credentials = {...credentials, access_token, stt_region}; + } + + return credentials; + } + async exec(cs, {ep, ep2}) { super.exec(cs); const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf); @@ -98,51 +136,60 @@ class TaskTranscribe extends Task { this.language = cs.speechRecognizerLanguage; if (this.data.recognizer) this.data.recognizer.language = this.language; } + if ('default' === this.label || !this.label) { + this.label = cs.speechRecognizerLabel; + if (this.data.recognizer) this.data.recognizer.label = this.label; + } + // fallback options + if ('default' === this.fallbackVendor || !this.fallbackVendor) { + this.fallbackVendor = cs.fallbackSpeechRecognizerVendor; + if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor; + } + if ('default' === this.fallbackLanguage || !this.fallbackLanguage) { + this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage; + if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage; + } + if ('default' === this.fallbackLabel || !this.fallbackLabel) { + this.label = cs.fallbackSpeechRecognizerLabel; + if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel; + } if (!this.data.recognizer.vendor) { this.data.recognizer.vendor = this.vendor; } - if (!this.sttCredentials) this.sttCredentials = - cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel); + if (!this.sttCredentials) { + this.sttCredentials = await this._initSpeechCredential(cs, this.vendor, this.label); + } + if (!this.fallbackSttCredentials) { + this.fallbackSttCredentials = await this._initSpeechCredential(cs, this.fallbackVendor, this.fallbackLabel); + } try { - if (!this.sttCredentials) { - const {writeAlerts, AlertType} = cs.srf.locals; - this.logger.info(`TaskTranscribe:exec - ERROR stt using ${this.vendor} requested but creds not supplied`); - writeAlerts({ - account_sid: cs.accountSid, - alert_type: AlertType.STT_NOT_PROVISIONED, - vendor: this.vendor - }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt')); - throw new Error('no provisioned speech credentials for TTS'); - } - - if (this.vendor === 'nuance' && this.sttCredentials.client_id) { - /* get nuance access token */ - const {client_id, secret} = this.sttCredentials; - const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts'); - this.logger.debug({client_id}, - `Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`); - this.sttCredentials = {...this.sttCredentials, access_token}; - } - else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) { - /* get ibm access token */ - const {stt_api_key, stt_region} = this.sttCredentials; - const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key); - this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`); - this.sttCredentials = {...this.sttCredentials, access_token, stt_region}; - } - await this._startTranscribing(cs, ep, 1); + await this._startTranscribing(cs, ep, 1, this.sttCredentials); if (this.separateRecognitionPerChannel && ep2) { - await this._startTranscribing(cs, ep2, 2); + await this._startTranscribing(cs, ep2, 2, this.sttCredentials); } - - updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid) - .catch(() => {/*already logged error */}); - + updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid); await this.awaitTaskDone(); } catch (err) { this.logger.info(err, 'TaskTranscribe:exec - error'); - this.parentTask && this.parentTask.emit('error', err); + let isFallbackSuccess = false; + if (this.fallbackSttCredentials) { + this.logger.info(err, 'TaskTranscribe:exec - fallback to 2nd speech provider'); + try { + await this._startTranscribing(cs, ep, 1, this.fallbackSttCredentials); + if (this.separateRecognitionPerChannel && ep2) { + await this._startTranscribing(cs, ep2, 2, this.fallbackSttCredentials); + } + updateSpeechCredentialLastUsed(this.fallbackSttCredentials.speech_credential_sid); + await this.awaitTaskDone(); + isFallbackSuccess = true; + } catch (error) { + this.logger.info(err, 'TaskTranscribe:exec - fallback error'); + } + } + if (!isFallbackSuccess) { + this.parentTask && this.parentTask.emit('error', err); + } } this.removeSpeechListeners(ep); } @@ -167,8 +214,8 @@ class TaskTranscribe extends Task { await this.awaitTaskDone(); } - async _startTranscribing(cs, ep, channel) { - const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer); + async _startTranscribing(cs, ep, channel, credentials) { + const opts = this.setChannelVarsForStt(this, credentials, this.data.recognizer); switch (this.vendor) { case 'google': this.bugname = 'google_transcribe';