fix transcribe

2025-12-21 09:08:02 +00:00 · 2023-08-18 10:24:41 +07:00
parent e9f2837370
commit 09961f564a
2 changed files with 101 additions and 48 deletions
--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -226,17 +226,23 @@ class TaskGather extends Task {
      this.language = cs.speechRecognizerLanguage;
      if (this.data.recognizer) this.data.recognizer.language = this.language;
    }
-    this.fallbackVendor = this.data.recognizer && this.data.recognizer.fallbackVendor !== 'default' ?
-      this.data.recognizer.fallbackVendor :
-      cs.fallbackSpeechRecognizerVendor;
-
-    this.fallbackLanguage = this.data.recognizer && this.data.recognizer.fallbackLanguage !== 'default' ?
-      this.data.recognizer.fallbackLanguage :
-      cs.fallbackSpeechRecognizerLanguage;
-
-    this.fallbackLabel = this.data.recognizer && this.data.recognizer.fallbackLabel !== 'default' ?
-      this.data.recognizer.fallbackLabel :
-      cs.fallbackSpeechRecognizerLabel;
+    if ('default' === this.label || !this.label) {
+      this.label = cs.speechRecognizerLabel;
+      if (this.data.recognizer) this.data.recognizer.label = this.label;
+    }
+    // Fallback options
+    if ('default' === this.fallbackVendor || !this.fallbackVendor) {
+      this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
+      if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
+    }
+    if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
+      this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
+      if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
+    }
+    if ('default' === this.fallbackLabel || !this.fallbackLabel) {
+      this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
+      if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
+    }

    if (!this.data.recognizer.vendor) {
      this.data.recognizer.vendor = this.vendor;
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -42,6 +42,11 @@ class TaskTranscribe extends Task {
      const recognizer = this.data.recognizer;
      this.vendor = recognizer.vendor;
      this.language = recognizer.language;
+      this.label = recognizer.label;
+
+      this.fallbackVendor = recognizer.fallbackVendor || 'default';
+      this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
+      this.fallbackLabel = recognizer.fallbackLabel || 'default';
      /* let credentials be supplied in the recognizer object at runtime */
      this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);

@@ -67,6 +72,39 @@ class TaskTranscribe extends Task {

  get name() { return TaskName.Transcribe; }

+  async _initSpeechCredential(cs, vendor, label) {
+    let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
+
+    if (!credentials) {
+      const {writeAlerts, AlertType} = cs.srf.locals;
+      this.logger.info(`TaskTranscribe:exec - ERROR stt using ${vendor} requested but creds not supplied`);
+      writeAlerts({
+      account_sid: cs.accountSid,
+        alert_type: AlertType.STT_NOT_PROVISIONED,
+        vendor: vendor
+      }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
+      throw new Error('no provisioned speech credentials for TTS');
+    }
+
+    if (vendor === 'nuance' && credentials.client_id) {
+      /* get nuance access token */
+      const {client_id, secret} = credentials;
+      const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
+      this.logger.debug({client_id},
+        `Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
+        credentials = {...credentials, access_token};
+    }
+    else if (vendor == 'ibm' && credentials.stt_api_key) {
+      /* get ibm access token */
+      const {stt_api_key, stt_region} = credentials;
+      const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
+      this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
+      credentials = {...credentials, access_token, stt_region};
+    }
+
+    return credentials;
+  }
+
  async exec(cs, {ep, ep2}) {
    super.exec(cs);
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
@@ -98,52 +136,61 @@ class TaskTranscribe extends Task {
      this.language = cs.speechRecognizerLanguage;
      if (this.data.recognizer) this.data.recognizer.language = this.language;
    }
+    if ('default' === this.label || !this.label) {
+      this.label = cs.speechRecognizerLabel;
+      if (this.data.recognizer) this.data.recognizer.label = this.label;
+    }
+    // fallback options
+    if ('default' === this.fallbackVendor || !this.fallbackVendor) {
+      this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
+      if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
+    }
+    if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
+      this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
+      if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
+    }
+    if ('default' === this.fallbackLabel || !this.fallbackLabel) {
+      this.label = cs.fallbackSpeechRecognizerLabel;
+      if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
+    }
    if (!this.data.recognizer.vendor) {
      this.data.recognizer.vendor = this.vendor;
    }
-    if (!this.sttCredentials) this.sttCredentials =
-      cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
-
-    try {
    if (!this.sttCredentials) {
-        const {writeAlerts, AlertType} = cs.srf.locals;
-        this.logger.info(`TaskTranscribe:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
-        writeAlerts({
-          account_sid: cs.accountSid,
-          alert_type: AlertType.STT_NOT_PROVISIONED,
-          vendor: this.vendor
-        }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
-        throw new Error('no provisioned speech credentials for TTS');
+      this.sttCredentials = await this._initSpeechCredential(cs, this.vendor, this.label);
    }

-      if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
-        /* get nuance access token */
-        const {client_id, secret} = this.sttCredentials;
-        const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
-        this.logger.debug({client_id},
-          `Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
-        this.sttCredentials = {...this.sttCredentials, access_token};
+    if (!this.fallbackSttCredentials) {
+      this.fallbackSttCredentials = await this._initSpeechCredential(cs, this.fallbackVendor, this.fallbackLabel);
    }
-      else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
-        /* get ibm access token */
-        const {stt_api_key, stt_region} = this.sttCredentials;
-        const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
-        this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
-        this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
-      }
-      await this._startTranscribing(cs, ep, 1);
+    try {
+      await this._startTranscribing(cs, ep, 1, this.sttCredentials);
      if (this.separateRecognitionPerChannel && ep2) {
-        await this._startTranscribing(cs, ep2, 2);
+        await this._startTranscribing(cs, ep2, 2, this.sttCredentials);
      }
-
-      updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
-        .catch(() => {/*already logged error */});
-
+      updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
      await this.awaitTaskDone();
    } catch (err) {
      this.logger.info(err, 'TaskTranscribe:exec - error');
+      let isFallbackSuccess = false;
+      if (this.fallbackSttCredentials) {
+        this.logger.info(err, 'TaskTranscribe:exec - fallback to 2nd speech provider');
+        try {
+          await this._startTranscribing(cs, ep, 1, this.fallbackSttCredentials);
+          if (this.separateRecognitionPerChannel && ep2) {
+            await this._startTranscribing(cs, ep2, 2, this.fallbackSttCredentials);
+          }
+          updateSpeechCredentialLastUsed(this.fallbackSttCredentials.speech_credential_sid);
+          await this.awaitTaskDone();
+          isFallbackSuccess = true;
+        } catch (error) {
+          this.logger.info(err, 'TaskTranscribe:exec - fallback error');
+        } 
+      }
+      if (!isFallbackSuccess) {
        this.parentTask && this.parentTask.emit('error', err);
      }
+    }
    this.removeSpeechListeners(ep);
  }

@@ -167,8 +214,8 @@ class TaskTranscribe extends Task {
    await this.awaitTaskDone();
  }

-  async _startTranscribing(cs, ep, channel) {
-    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
+  async _startTranscribing(cs, ep, channel, credentials) {
+    const opts = this.setChannelVarsForStt(this, credentials, this.data.recognizer);
    switch (this.vendor) {
      case 'google':
        this.bugname = 'google_transcribe';