fix

2026-02-13 17:59:42 +00:00 · 2023-08-18 14:06:46 +07:00 · 2023-08-18 13:38:23 +07:00 · 2023-08-18 12:17:30 +07:00 · 2023-08-18 11:24:14 +07:00 · 2023-08-18 11:18:01 +07:00
10 changed files with 494 additions and 153 deletions
--- a/lib/session/call-session.js
+++ b/lib/session/call-session.js
@@ -180,6 +180,13 @@ class CallSession extends Emitter {
    this.application.speech_synthesis_vendor = vendor;
  }
  get fallbackSpeechSynthesisVendor() {
    return this.application.use_for_fallback_speech ? this.application.fallback_speech_synthesis_vendor : null;
  }
  set fallbackSpeechSynthesisVendor(vendor) {
    this.application.fallback_speech_synthesis_vendor = vendor;
  }
  /**
   * default label to use for speech synthesis if not provided in the app
   */
@@ -189,6 +196,13 @@ class CallSession extends Emitter {
  set speechSynthesisLabel(label) {
    this.application.speech_synthesis_label = label;
  }
  get fallbackSpeechSynthesisLabel() {
    return this.application.fallback_speech_synthesis_label;
  }
  set fallbackSpeechSynthesisLabel(label) {
    this.application.fallback_speech_synthesis_label = label;
  }
  /**
   * default voice to use for speech synthesis if not provided in the app
   */
@@ -198,6 +212,13 @@ class CallSession extends Emitter {
  set speechSynthesisVoice(voice) {
    this.application.speech_synthesis_voice = voice;
  }
  get fallbackSpeechSynthesisVoice() {
    return this.application.fallback_speech_synthesis_voice;
  }
  set fallbackSpeechSynthesisVoice(voice) {
    this.application.fallback_speech_synthesis_voice = voice;
  }
  /**
   * default language to use for speech synthesis if not provided in the app
   */
@@ -208,6 +229,13 @@ class CallSession extends Emitter {
    this.application.speech_synthesis_language = language;
  }
  get fallbackSpeechSynthesisLanguage() {
    return this.application.fallback_speech_synthesis_language;
  }
  set fallbackSpeechSynthesisLanguage(language) {
    this.application.fallback_speech_synthesis_language = language;
  }
  /**
   * default vendor to use for speech recognition if not provided in the app
   */
@@ -217,6 +245,13 @@ class CallSession extends Emitter {
  set speechRecognizerVendor(vendor) {
    this.application.speech_recognizer_vendor = vendor;
  }
  get fallbackSpeechRecognizerVendor() {
    return this.application.fallback_speech_recognizer_vendor;
  }
  set fallbackSpeechRecognizerVendor(vendor) {
    this.application.fallback_speech_recognizer_vendor = vendor;
  }
  /**
   * default vendor to use for speech recognition if not provided in the app
   */
@@ -226,6 +261,13 @@ class CallSession extends Emitter {
  set speechRecognizerLabel(label) {
    this.application.speech_recognizer_label = label;
  }
  get fallbackSpeechRecognizerLabel() {
    return this.application.fallback_speech_recognizer_label;
  }
  set fallbackSpeechRecognizerLabel(label) {
    this.application.fallback_speech_recognizer_label = label;
  }
  /**
 * default language to use for speech recognition if not provided in the app
 */
@@ -236,6 +278,13 @@ class CallSession extends Emitter {
    this.application.speech_recognizer_language = language;
  }
  get fallbackSpeechRecognizerLanguage() {
    return this.application.fallback_speech_recognizer_language;
  }
  set fallbackSpeechRecognizerLanguage(language) {
    this.application.fallback_speech_recognizer_language = language;
  }
  /**
   * indicates whether the call currently in progress
   */
--- a/lib/tasks/config.js
+++ b/lib/tasks/config.js
@@ -114,6 +114,19 @@ class TaskConfig extends Task {
      cs.speechSynthesisVoice = this.synthesizer.voice !== 'default'
        ? this.synthesizer.voice
        : cs.speechSynthesisVoice;
      // fallback vendor
      cs.fallbackSpeechSynthesisVendor = this.synthesizer.fallbackVendor !== 'default'
        ? this.synthesizer.fallbackVendor
        : cs.fallbackSpeechSynthesisVendor;
      cs.fallbackSpeechSynthesisLabel = this.synthesizer.fallbackLabel !== 'default'
        ? this.synthesizer.fallbackLabel
        : cs.fallbackSpeechSynthesisLabel;
      cs.fallbackSpeechSynthesisLanguage = this.synthesizer.fallbackLanguage !== 'default'
        ?  this.synthesizer.fallbackLanguage
        : cs.fallbackSpeechSynthesisLanguage;
      cs.fallbackSpeechSynthesisVoice = this.synthesizer.fallbackVoice !== 'default'
        ? this.synthesizer.fallbackVoice
        : cs.fallbackSpeechSynthesisVoice;
      this.logger.info({synthesizer: this.synthesizer}, 'Config: updated synthesizer');
    }
    if (this.hasRecognizer) {
@@ -126,6 +139,17 @@ class TaskConfig extends Task {
      cs.speechRecognizerLanguage = this.recognizer.language !== 'default'
        ? this.recognizer.language
        : cs.speechRecognizerLanguage;
      //fallback
      cs.fallbackSpeechRecognizerVendor = this.recognizer.fallbackVendor !== 'default'
        ? this.recognizer.fallbackVendor
        : cs.fallbackSpeechRecognizerVendor;
      cs.fallbackSpeechRecognizerLabel = this.recognizer.fallbackLabel !== 'default'
        ? this.recognizer.fallbackLabel
        : cs.fallbackSpeechRecognizerLabel;
      cs.fallbackSpeechRecognizerLanguage = this.recognizer.fallbackLanguage !== 'default'
        ? this.recognizer.fallbackLanguage
        : cs.fallbackSpeechRecognizerLanguage;
      cs.isContinuousAsr = typeof this.recognizer.asrTimeout === 'number' ? true : false;
      if (cs.isContinuousAsr) {
        cs.asrTimeout = this.recognizer.asrTimeout;
--- a/lib/tasks/dialogflow/index.js
+++ b/lib/tasks/dialogflow/index.js
@@ -59,6 +59,12 @@ class Dialogflow extends Task {
      this.language = this.data.tts.language || 'default';
      this.voice = this.data.tts.voice || 'default';
      this.speechSynthesisLabel = this.data.tts.label || null;
      // fallback tts
      this.fallbackVendor = this.data.tts.fallbackVendor || 'default';
      this.fallbackLanguage = this.data.tts.fallbackLanguage || 'default';
      this.fallbackVoice = this.data.tts.fallbackLanguage || 'default';
      this.fallbackLabel = this.data.tts.fallbackLabel || 'default';
    }
    this.bargein = this.data.bargein;
  }
@@ -119,9 +125,15 @@ class Dialogflow extends Task {
        this.vendor = cs.speechSynthesisVendor;
        this.language = cs.speechSynthesisLanguage;
        this.voice = cs.speechSynthesisVoice;
        this.speechSynthesisLabel = cs.speechSynthesisLabel;
      }
-      this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts',
+      if (this.fallbackVendor === 'default') {
-        this.speechSynthesisLabel || cs.speechSynthesisLabel);
+        this.fallbackVendor = cs.fallbackSpeechSynthesisVendor;
        this.fallbackLanguage = cs.fallbackSpeechSynthesisLanguage;
        this.fallbackVoice = cs.fallbackSpeechSynthesisVoice;
        this.fallbackLabel = cs.fallbackSpeechSynthesisLabel;
      }
      this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts', this.speechSynthesisLabel);
      this.ep.addCustomEventListener('dialogflow::intent', this._onIntent.bind(this, ep, cs));
      this.ep.addCustomEventListener('dialogflow::transcription', this._onTranscription.bind(this, ep, cs));
@@ -223,17 +235,7 @@ class Dialogflow extends Task {
      }
      try {
-        const obj = {
+        const {filePath, servedFromCache} = await this.fallbackSynthAudio(cs, intent, stats, synthAudio);
          account_sid: cs.accountSid,
          text: intent.fulfillmentText,
          vendor: this.vendor,
          language: this.language,
          voice: this.voice,
          salt: cs.callSid,
          credentials: this.ttsCredentials
        };
        this.logger.debug({obj}, 'Dialogflow:_onIntent - playing message via tts');
        const {filePath, servedFromCache} = await synthAudio(stats, obj);
        if (filePath) cs.trackTmpFile(filePath);
        if (!this.ttsCredentials && !servedFromCache) cs.billForTts(intent.fulfillmentText.length);
@@ -279,6 +281,46 @@ class Dialogflow extends Task {
    }
  }
  async fallbackSynthAudio(cs, intent, stats, synthAudio) {
    try {
      const obj = {
        account_sid: cs.accountSid,
        text: intent.fulfillmentText,
        vendor: this.vendor,
        language: this.language,
        voice: this.voice,
        salt: cs.callSid,
        credentials: this.ttsCredentials
      };
      this.logger.debug({obj}, 'Dialogflow:_onIntent - playing message via tts');
      return await synthAudio(stats, obj);
    } catch (error) {
      this.logger.info({error}, 'Failed to synthesize audio from primary vendor');
      try {
        if (this.fallbackVendor) {
          const credentials = cs.getSpeechCredentials(this.fallbackVendor, 'tts', this.fallbackLabel);
          const obj = {
            account_sid: cs.accountSid,
            text: intent.fulfillmentText,
            vendor: this.fallbackVendor,
            language: this.fallbackLanguage,
            voice: this.fallbackVoice,
            salt: cs.callSid,
            credentials
          };
          this.logger.debug({obj}, 'Dialogflow:_onIntent - playing message via fallback tts');
          return await synthAudio(stats, obj);
        }
      } catch (err) {
        this.logger.info({err}, 'Failed to synthesize audio from falllback vendor');
        throw err;
      }
      throw error;
    }
  }
  /**
   * A transcription - either interim or final - has been returned.
   * If we are doing barge-in based on hotword detection, check for the hotword or phrase.
--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -65,6 +65,11 @@ class TaskGather extends Task {
      const recognizer = this.data.recognizer;
      this.vendor = recognizer.vendor;
      this.language = recognizer.language;
      this.label = recognizer.label;
      this.fallbackVendor = recognizer.fallbackVendor || 'default';
      this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
      this.fallbackLabel = recognizer.fallbackLabel || 'default';
      /* let credentials be supplied in the recognizer object at runtime */
      this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
@@ -133,11 +138,60 @@ class TaskGather extends Task {
    return s;
  }
  async _initSpeechCredentials(cs, vendor, label) {
    const {getNuanceAccessToken, getIbmAccessToken} = this.cs.srf.locals.dbHelpers;
    let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
    if (!credentials) {
      const {writeAlerts, AlertType} = cs.srf.locals;
      this.logger.info(`TaskGather:exec - ERROR stt using ${vendor} requested but creds not supplied`);
      writeAlerts({
        account_sid: cs.accountSid,
        alert_type: AlertType.STT_NOT_PROVISIONED,
        vendor
      }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
      // Notify application that STT vender is wrong.
      this.notifyError({
        msg: 'ASR error',
        details: `No speech-to-text service credentials for ${vendor} have been configured`
      });
      this.notifyTaskDone();
      throw new Error(`No speech-to-text service credentials for ${vendor} have been configured`);
    }
    if (vendor === 'nuance' && credentials.client_id) {
      /* get nuance access token */
      const {client_id, secret} = credentials;
      const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
      this.logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
      credentials = {...credentials, access_token};
    }
    else if (vendor == 'ibm' && credentials.stt_api_key) {
      /* get ibm access token */
      const {stt_api_key, stt_region} = credentials;
      const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
      this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
      credentials = {...credentials, access_token, stt_region};
    }
    return credentials;
  }
  async _startTranscribeForSpeech(cs, ep, vendor, language, credentials) {
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
    await this._initSpeech(cs, ep, vendor, credentials);
    if (this.killed) {
      this.logger.info(`Gather:exec - task was quickly killed so do not transcribe for vendor: ${vendor}`);
      return;
    }
    this.execVendor = vendor;
    this.execLanguage = language;
    this._startTranscribing(ep);
    return updateSpeechCredentialLastUsed(credentials.speech_credential_sid);
  }
  async exec(cs, {ep}) {
    this.logger.debug({options: this.data}, 'Gather:exec');
    await super.exec(cs);
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
    const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
    if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
      const {hints, hintsBoost} = cs.globalSttHints;
@@ -184,58 +238,55 @@ class TaskGather extends Task {
      this.language = cs.speechRecognizerLanguage;
      if (this.data.recognizer) this.data.recognizer.language = this.language;
    }
    if ('default' === this.label || !this.label) {
      this.label = cs.speechRecognizerLabel;
      if (this.data.recognizer) this.data.recognizer.label = this.label;
    }
    // Fallback options
    if ('default' === this.fallbackVendor || !this.fallbackVendor) {
      this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
      if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
    }
    if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
      this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
      if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
    }
    if ('default' === this.fallbackLabel || !this.fallbackLabel) {
      this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
      if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
    }
    if (!this.data.recognizer.vendor) {
      this.data.recognizer.vendor = this.vendor;
    }
-    if (this.needsStt && !this.sttCredentials) this.sttCredentials =
+
      cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
    if (this.needsStt && !this.sttCredentials) {
-      const {writeAlerts, AlertType} = cs.srf.locals;
+      this.sttCredentials = await this._initSpeechCredentials(cs, this.vendor, this.label);
      this.logger.info(`TaskGather:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
      writeAlerts({
        account_sid: cs.accountSid,
        alert_type: AlertType.STT_NOT_PROVISIONED,
        vendor: this.vendor
      }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
      // Notify application that STT vender is wrong.
      this.notifyError({
        msg: 'ASR error',
        details: `No speech-to-text service credentials for ${this.vendor} have been configured`
      });
      this.notifyTaskDone();
      throw new Error(`No speech-to-text service credentials for ${this.vendor} have been configured`);
    }
-    if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
+    // Fetch credential for fallback recognizer
-      /* get nuance access token */
+    if (this.needsStt && !this.fallbackSttCredentials && this.fallbackVendor) {
-      const {client_id, secret} = this.sttCredentials;
+      this.fallbackSttCredentials = await this._initSpeechCredentials(
-      const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
+        cs, this.fallbackSttCredentials, this.fallbackLabel);
      this.logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
      this.sttCredentials = {...this.sttCredentials, access_token};
    }
-    else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
+
-      /* get ibm access token */
+    const startListening = async(cs, ep) => {
      const {stt_api_key, stt_region} = this.sttCredentials;
      const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
      this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
      this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
    }
    const startListening = (cs, ep) => {
      this._startTimer();
      if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
      if (this.input.includes('speech') && !this.listenDuringPrompt) {
-        this._initSpeech(cs, ep)
+        try {
-          .then(() => {
+          return await this._startTranscribeForSpeech(cs, ep, this.vendor, this.language, this.sttCredentials);
-            if (this.killed) {
+        } catch (error) {
-              this.logger.info('Gather:exec - task was quickly killed so do not transcribe');
+          this.logger.error({error}, 'error in initSpeech');
-              return;
+          if (this.fallbackSttCredentials) {
            try {
              return await this._startTranscribeForSpeech(cs, ep, this.fallbackVendor,
                this.fallbackLanguage, this.fallbackSttCredentials);
            } catch (err) {
              this.logger.error({err}, `error in initSpeech for fallback STT provider ${this.fallbackVendor}`);
            }
-            this._startTranscribing(ep);
+          }
-            return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
+        }
          })
          .catch((err) => {
            this.logger.error({err}, 'error in initSpeech');
          });
      }
    };
@@ -289,10 +340,19 @@ class TaskGather extends Task {
      }
      if (this.input.includes('speech') && this.listenDuringPrompt) {
-        await this._initSpeech(cs, ep);
+        try {
-        this._startTranscribing(ep);
+          await this._startTranscribeForSpeech(cs, ep, this.vendor, this.language, this.sttCredentials);
-        updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
+        } catch (error) {
-          .catch(() => {/*already logged error */});
+          this.logger.error({error}, 'error in initSpeech');
          if (this.fallbackSttCredentials) {
            try {
              await this._startTranscribeForSpeech(cs, ep, this.fallbackVendor,
                this.fallbackLanguage, this.fallbackSttCredentials);
            } catch (err) {
              this.logger.error({err}, `error in initSpeech for fallback STT provider ${this.fallbackVendor}`);
            }
          }
        }
      }
      if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
@@ -363,9 +423,9 @@ class TaskGather extends Task {
    }
  }
-  async _initSpeech(cs, ep) {
+  async _initSpeech(cs, ep, vendor, credentials) {
-    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
+    const opts = this.setChannelVarsForStt(this, credentials, this.data.recognizer);
-    switch (this.vendor) {
+    switch (vendor) {
      case 'google':
        this.bugname = 'google_transcribe';
        ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
@@ -451,9 +511,9 @@ class TaskGather extends Task {
          break;
        }
        else {
-          this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
+          this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${credentials.vendor}`});
          this.notifyTaskDone();
-          throw new Error(`Invalid vendor ${this.vendor}`);
+          throw new Error(`Invalid vendor ${credentials.vendor}`);
        }
    }
@@ -465,14 +525,14 @@ class TaskGather extends Task {
  _startTranscribing(ep) {
    this.logger.debug({
-      vendor: this.vendor,
+      vendor: this.execVendor,
-      locale: this.language,
+      locale: this.execLanguage,
      interim: this.interim,
      bugname: this.bugname
    }, 'Gather:_startTranscribing');
    ep.startTranscription({
-      vendor: this.vendor,
+      vendor: this.execVendor,
-      locale: this.language,
+      locale: this.execLanguage,
      interim: this.interim,
      bugname: this.bugname,
    }).catch((err) => {
@@ -481,7 +541,7 @@ class TaskGather extends Task {
      writeAlerts({
        account_sid: this.cs.accountSid,
        alert_type: AlertType.STT_FAILURE,
-        vendor: this.vendor,
+        vendor: this.execVendor,
        detail: err.message
      });
    }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
--- a/lib/tasks/lex.js
+++ b/lib/tasks/lex.js
@@ -26,6 +26,12 @@ class Lex extends Task {
      this.language = this.data.tts.language || 'default';
      this.voice = this.data.tts.voice || 'default';
      this.speechCredentialLabel = this.data.tts.label || null;
      // fallback tts
      this.fallbackVendor = this.data.tts.fallbackVendor || 'default';
      this.fallbackLanguage = this.data.tts.fallbackLanguage || 'default';
      this.fallbackVoice = this.data.tts.fallbackLanguage || 'default';
      this.fallbackLabel = this.data.tts.fallbackLabel || 'default';
    }
    this.botName = `${this.bot}:${this.alias}:${this.region}`;
@@ -103,9 +109,15 @@ class Lex extends Task {
        this.vendor = cs.speechSynthesisVendor;
        this.language = cs.speechSynthesisLanguage;
        this.voice = cs.speechSynthesisVoice;
        this.speechCredentialLabel = cs.speechSynthesisLabel;
      }
-      this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts',
+      if (this.fallbackVendor === 'default') {
-        this.speechCredentialLabel || cs.speechSynthesisVendor);
+        this.fallbackVendor = cs.fallbackSpeechSynthesisVendor;
        this.fallbackLanguage = cs.fallbackSpeechSynthesisLanguage;
        this.fallbackVoice = cs.fallbackSpeechSynthesisVoice;
        this.fallbackLabel = cs.fallbackSpeechSynthesisLabel;
      }
      this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts', this.speechCredentialLabel);
      this.ep.addCustomEventListener('lex::intent', this._onIntent.bind(this, ep, cs));
      this.ep.addCustomEventListener('lex::transcription', this._onTranscription.bind(this, ep, cs));
@@ -170,6 +182,41 @@ class Lex extends Task {
    }
  }
  async fallbackSynthAudio(cs, msg, stats, synthAudio) {
    try {
      const {filePath} = await synthAudio(stats, {
        account_sid: cs.accountSid,
        text: msg,
        vendor: this.vendor,
        language: this.language,
        voice: this.voice,
        salt: cs.callSid,
        credentials: this.ttsCredentials
      });
      return filePath;
    } catch (error) {
      this.logger.info({error}, 'failed to synth audio from primary vendor');
      if (this.fallbackVendor) {
        try {
          const credential = cs.getSpeechCredentials(this.fallbackVendor, 'tts', this.fallbackLabel);
          const {filePath} = await synthAudio(stats, {
            account_sid: cs.accountSid,
            text: msg,
            vendor: this.fallbackVendor,
            language: this.fallbackLanguage,
            voice: this.fallbackVoice,
            salt: cs.callSid,
            credentials: credential
          });
          return filePath;
        } catch (err) {
          this.logger.info({err}, 'failed to synth audio from fallback vendor');
        }
      }
    }
  }
  /**
   * @param {*} evt - event data
   */
@@ -189,16 +236,7 @@ class Lex extends Task {
        try {
          this.logger.debug(`tts with ${this.vendor} ${this.voice}`);
-          // eslint-disable-next-line no-unused-vars
+          const filePath = await this.fallbackSynthAudio(cs, msg, stats, synthAudio);
          const {filePath, servedFromCache} = await synthAudio(stats, {
            account_sid: cs.accountSid,
            text: msg,
            vendor: this.vendor,
            language: this.language,
            voice: this.voice,
            salt: cs.callSid,
            credentials: this.ttsCredentials
          });
          if (filePath) cs.trackTmpFile(filePath);
          if (this.events.includes('start-play')) {
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -59,15 +59,30 @@ class TaskSay extends Task {
    const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
      this.synthesizer.vendor :
      cs.speechSynthesisVendor;
    const fallbackVendor = this.synthesizer.fallbackVendor && this.synthesizer.fallbackVendor !== 'default' ?
      this.synthesizer.fallbackVendor :
      cs.fallbackSpeechSynthesisVendor;
    const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
      this.synthesizer.language :
      cs.speechSynthesisLanguage ;
    const fallbackLanguage = this.synthesizer.fallbackLanguage && this.synthesizer.fallbackLanguage !== 'default' ?
      this.synthesizer.fallbackLanguage :
      cs.fallbackSpeechSynthesisLanguage ;
    let voice =  this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
      this.synthesizer.voice :
      cs.speechSynthesisVoice;
    const fallbackVoice = this.synthesizer.fallbackVoice && this.synthesizer.fallbackVoice !== 'default' ?
      this.synthesizer.fallbackVoice :
      cs.fallbackSpeechSynthesisVoice;
    const label = this.synthesizer.label && this.synthesizer.label !== 'default' ?
      this.synthesizer.label :
      cs.speechSynthesisLabel;
    const fallbackLabel = this.synthesizer.fallbackLabel && this.synthesizer.fallbackLabel !== 'default' ?
      this.synthesizer.fallbackLabel :
      cs.fallbackSpeechSynthesisLabel;
    const engine = this.synthesizer.engine || 'standard';
    const salt = cs.callSid;
-    let credentials = cs.getSpeechCredentials(vendor, 'tts', this.data.synthesizer?.label || cs.speechSynthesisLabel);
+    let credentials = cs.getSpeechCredentials(vendor, 'tts', label);
    /* parse Nuance voices into name and model */
    let model;
@@ -118,6 +133,8 @@ class TaskSay extends Task {
          'tts.language': language,
          'tts.voice': voice
        });
        let filePathUrl, isFromCache, roundTripTime;
        let executedVendor, executedLanguage;
        try {
          const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
            account_sid: cs.accountSid,
@@ -131,37 +148,101 @@ class TaskSay extends Task {
            credentials,
            disableTtsCache : this.disableTtsCache
          });
-          this.logger.debug(`file ${filePath}, served from cache ${servedFromCache}`);
+
-          if (filePath) cs.trackTmpFile(filePath);
+          span.setAttributes({'tts.cached': servedFromCache});
          span.end();
          if (!servedFromCache && !lastUpdated) {
            lastUpdated = true;
            updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
              .catch(() => {/*already logged error */});
          }
-          span.setAttributes({'tts.cached': servedFromCache});
+
-          span.end();
+          filePathUrl = filePath;
-          if (!servedFromCache && rtt) {
+          isFromCache = servedFromCache;
-            this.notifyStatus({
+          roundTripTime = rtt;
-              event: 'synthesized-audio',
+          executedVendor = vendor;
-              vendor,
+          executedLanguage = language;
-              language,
+
-              characters: text.length,
+        } catch (error) {
-              elapsedTime: rtt
+          let isFallbackSuccess = false;
          if (fallbackVendor) {
            const fallbackcredentials = cs.getSpeechCredentials(fallbackVendor, 'tts', fallbackLabel);
            const {span: fallbackSpan} = this.startChildSpan('fallback-tts-generation', {
              'tts.vendor': fallbackVendor,
              'tts.language': fallbackLanguage,
              'tts.voice': fallbackVoice
            });
            try {
              const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
                account_sid: cs.accountSid,
                text,
                vendor: fallbackVendor,
                language: fallbackLanguage,
                voice: fallbackVoice,
                engine,
                model,
                salt,
                credentials: fallbackcredentials,
                disableTtsCache : this.disableTtsCache
              });
              isFallbackSuccess = true;
              fallbackSpan.setAttributes({'tts.cached': servedFromCache});
              fallbackSpan.end();
              if (!servedFromCache && !lastUpdated) {
                lastUpdated = true;
                updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
                  .catch(() => {/*already logged error */});
              }
              filePathUrl = filePath;
              isFromCache = servedFromCache;
              roundTripTime = rtt;
              executedVendor = fallbackVendor;
              executedLanguage = fallbackLanguage;
            } catch (err) {
              this.logger.info({err}, 'fallback Speech failed to synthesize audio');
              fallbackSpan.end();
              writeAlerts({
                account_sid: cs.accountSid,
                alert_type: AlertType.TTS_FAILURE,
                vendor: fallbackVendor,
                detail: err.message
              }).catch((err) => this.logger.info({err}, 'Error generating alert for fallback tts failure'));
            }
          }
          if (!isFallbackSuccess) {
            this.logger.info({error}, 'Error synthesizing tts');
            span.end();
            writeAlerts({
              account_sid: cs.accountSid,
              alert_type: AlertType.TTS_FAILURE,
              vendor,
              detail: error.message
            }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
            this.notifyError({msg: 'TTS error', details: error.message || error});
            return;
          }
          return filePath;
        } catch (err) {
          this.logger.info({err}, 'Error synthesizing tts');
          span.end();
          writeAlerts({
            account_sid: cs.accountSid,
            alert_type: AlertType.TTS_FAILURE,
            vendor,
            detail: err.message
          }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
          this.notifyError({msg: 'TTS error', details: err.message || err});
          return;
        }
        this.logger.debug(`file ${filePathUrl}, served from cache ${isFromCache}`);
        if (filePathUrl) cs.trackTmpFile(filePathUrl);
        if (!isFromCache && roundTripTime) {
          this.notifyStatus({
            event: 'synthesized-audio',
            vendor: executedVendor,
            language: executedLanguage,
            characters: text.length,
            elapsedTime: roundTripTime
          });
        }
        return filePathUrl;
      };
      const arr = this.text.map((t) => generateAudio(t));
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -42,6 +42,11 @@ class TaskTranscribe extends Task {
      const recognizer = this.data.recognizer;
      this.vendor = recognizer.vendor;
      this.language = recognizer.language;
      this.label = recognizer.label;
      this.fallbackVendor = recognizer.fallbackVendor || 'default';
      this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
      this.fallbackLabel = recognizer.fallbackLabel || 'default';
      /* let credentials be supplied in the recognizer object at runtime */
      this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
@@ -67,10 +72,43 @@ class TaskTranscribe extends Task {
  get name() { return TaskName.Transcribe; }
  async _initSpeechCredential(cs, vendor, label) {
    const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
    let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
    if (!credentials) {
      const {writeAlerts, AlertType} = cs.srf.locals;
      this.logger.info(`TaskTranscribe:exec - ERROR stt using ${vendor} requested but creds not supplied`);
      writeAlerts({
        account_sid: cs.accountSid,
        alert_type: AlertType.STT_NOT_PROVISIONED,
        vendor: vendor
      }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
      throw new Error('no provisioned speech credentials for TTS');
    }
    if (vendor === 'nuance' && credentials.client_id) {
      /* get nuance access token */
      const {client_id, secret} = credentials;
      const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
      this.logger.debug({client_id},
        `Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
      credentials = {...credentials, access_token};
    }
    else if (vendor == 'ibm' && credentials.stt_api_key) {
      /* get ibm access token */
      const {stt_api_key, stt_region} = credentials;
      const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
      this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
      credentials = {...credentials, access_token, stt_region};
    }
    return credentials;
  }
  async exec(cs, {ep, ep2}) {
    super.exec(cs);
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
    const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
    if (cs.hasGlobalSttHints) {
      const {hints, hintsBoost} = cs.globalSttHints;
@@ -98,51 +136,60 @@ class TaskTranscribe extends Task {
      this.language = cs.speechRecognizerLanguage;
      if (this.data.recognizer) this.data.recognizer.language = this.language;
    }
    if ('default' === this.label || !this.label) {
      this.label = cs.speechRecognizerLabel;
      if (this.data.recognizer) this.data.recognizer.label = this.label;
    }
    // fallback options
    if ('default' === this.fallbackVendor || !this.fallbackVendor) {
      this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
      if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
    }
    if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
      this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
      if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
    }
    if ('default' === this.fallbackLabel || !this.fallbackLabel) {
      this.label = cs.fallbackSpeechRecognizerLabel;
      if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
    }
    if (!this.data.recognizer.vendor) {
      this.data.recognizer.vendor = this.vendor;
    }
-    if (!this.sttCredentials) this.sttCredentials =
+    if (!this.sttCredentials) {
-      cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
+      this.sttCredentials = await this._initSpeechCredential(cs, this.vendor, this.label);
    }
    if (!this.fallbackSttCredentials) {
      this.fallbackSttCredentials = await this._initSpeechCredential(cs, this.fallbackVendor, this.fallbackLabel);
    }
    try {
-      if (!this.sttCredentials) {
+      await this._startTranscribing(cs, ep, 1, this.sttCredentials);
        const {writeAlerts, AlertType} = cs.srf.locals;
        this.logger.info(`TaskTranscribe:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
        writeAlerts({
          account_sid: cs.accountSid,
          alert_type: AlertType.STT_NOT_PROVISIONED,
          vendor: this.vendor
        }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
        throw new Error('no provisioned speech credentials for TTS');
      }
      if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
        /* get nuance access token */
        const {client_id, secret} = this.sttCredentials;
        const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
        this.logger.debug({client_id},
          `Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
        this.sttCredentials = {...this.sttCredentials, access_token};
      }
      else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
        /* get ibm access token */
        const {stt_api_key, stt_region} = this.sttCredentials;
        const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
        this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
        this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
      }
      await this._startTranscribing(cs, ep, 1);
      if (this.separateRecognitionPerChannel && ep2) {
-        await this._startTranscribing(cs, ep2, 2);
+        await this._startTranscribing(cs, ep2, 2, this.sttCredentials);
      }
-
+      updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
      updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
        .catch(() => {/*already logged error */});
      await this.awaitTaskDone();
    } catch (err) {
      this.logger.info(err, 'TaskTranscribe:exec - error');
-      this.parentTask && this.parentTask.emit('error', err);
+      let isFallbackSuccess = false;
      if (this.fallbackSttCredentials) {
        this.logger.info(err, 'TaskTranscribe:exec - fallback to 2nd speech provider');
        try {
          await this._startTranscribing(cs, ep, 1, this.fallbackSttCredentials);
          if (this.separateRecognitionPerChannel && ep2) {
            await this._startTranscribing(cs, ep2, 2, this.fallbackSttCredentials);
          }
          updateSpeechCredentialLastUsed(this.fallbackSttCredentials.speech_credential_sid);
          await this.awaitTaskDone();
          isFallbackSuccess = true;
        } catch (error) {
          this.logger.info(err, 'TaskTranscribe:exec - fallback error');
        }
      }
      if (!isFallbackSuccess) {
        this.parentTask && this.parentTask.emit('error', err);
      }
    }
    this.removeSpeechListeners(ep);
  }
@@ -167,8 +214,8 @@ class TaskTranscribe extends Task {
    await this.awaitTaskDone();
  }
-  async _startTranscribing(cs, ep, channel) {
+  async _startTranscribing(cs, ep, channel, credentials) {
-    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
+    const opts = this.setChannelVarsForStt(this, credentials, this.data.recognizer);
    switch (this.vendor) {
      case 'google':
        this.bugname = 'google_transcribe';
--- a/package-lock.json
+++ b/package-lock.json
@@ -17,7 +17,7 @@
        "@jambonz/speech-utils": "^0.0.19",
        "@jambonz/stats-collector": "^0.1.9",
        "@jambonz/time-series": "^0.2.8",
-        "@jambonz/verb-specifications": "^0.0.27",
+        "@jambonz/verb-specifications": "^0.0.29",
        "@opentelemetry/api": "^1.4.0",
        "@opentelemetry/exporter-jaeger": "^1.9.0",
        "@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
@@ -3019,9 +3019,9 @@
      }
    },
    "node_modules/@jambonz/verb-specifications": {
-      "version": "0.0.27",
+      "version": "0.0.29",
-      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.27.tgz",
+      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.29.tgz",
-      "integrity": "sha512-DIcxhCNrgr2RTE3YrGNP15RqKyV+P8f97SPBlKd2zTM5aN2oV5xv+pRDx5gLzmrUZ5TIEaBXQN3vTmM2Zx5Q6g==",
+      "integrity": "sha512-jeYI+GN7Y5nXhdFG3SXvXaBlhCjIC+l5AcBywDDGxxyuuKRTukPS0MSvCtWPZP6H3wYYGqfJ4DR/vgtBF3pvyQ==",
      "dependencies": {
        "debug": "^4.3.4",
        "pino": "^8.8.0"
@@ -12985,9 +12985,9 @@
      }
    },
    "@jambonz/verb-specifications": {
-      "version": "0.0.27",
+      "version": "0.0.29",
-      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.27.tgz",
+      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.29.tgz",
-      "integrity": "sha512-DIcxhCNrgr2RTE3YrGNP15RqKyV+P8f97SPBlKd2zTM5aN2oV5xv+pRDx5gLzmrUZ5TIEaBXQN3vTmM2Zx5Q6g==",
+      "integrity": "sha512-jeYI+GN7Y5nXhdFG3SXvXaBlhCjIC+l5AcBywDDGxxyuuKRTukPS0MSvCtWPZP6H3wYYGqfJ4DR/vgtBF3pvyQ==",
      "requires": {
        "debug": "^4.3.4",
        "pino": "^8.8.0"
--- a/package.json
+++ b/package.json
@@ -31,7 +31,7 @@
    "@jambonz/speech-utils": "^0.0.19",
    "@jambonz/stats-collector": "^0.1.9",
    "@jambonz/time-series": "^0.2.8",
-    "@jambonz/verb-specifications": "^0.0.27",
+    "@jambonz/verb-specifications": "^0.0.29",
    "@opentelemetry/api": "^1.4.0",
    "@opentelemetry/exporter-jaeger": "^1.9.0",
    "@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
--- a/test/index.js
+++ b/test/index.js
@@ -7,7 +7,7 @@ require('./dial-tests');
 require('./webhooks-tests');
 require('./say-tests');
 require('./gather-tests');
-// require('./transcribe-tests');
+require('./transcribe-tests');
 require('./sip-request-tests');
 require('./create-call-test');
 require('./play-tests');
Author	SHA1	Message	Date
Quan HL	8ad047b605	fix	2023-08-18 14:06:46 +07:00
Quan HL	b6c307db70	fix	2023-08-18 13:38:23 +07:00
Quan HL	aa161290c7	fix	2023-08-18 12:17:30 +07:00
Quan HL	4322159a41	fix gather verb	2023-08-18 11:24:14 +07:00
Quan HL	848aa43dcb	fix gather verb	2023-08-18 11:18:01 +07:00
Quan HL	18d7ea3e37	fix transcribe	2023-08-18 10:38:27 +07:00
Quan HL	09961f564a	fix transcribe	2023-08-18 10:24:41 +07:00
Quan HL	e9f2837370	fix gather	2023-08-18 09:57:02 +07:00
Quan HL	a97d99650c	wip	2023-08-17 17:04:38 +07:00
Quan HL	541cb1458d	wip	2023-08-17 16:58:46 +07:00
Quan HL	5754c386d3	feat fallback speech	2023-08-17 14:28:50 +07:00
Quan HL	b1c0478051	feat fallback speech	2023-08-17 14:25:26 +07:00