Feat/868: Use global synthesizer config properties for say verb (#869)

* feat/868: Use the properties from global config in verb for TTS * feat/868: setting this.options to combination of cs.synthesizer.options and this.options * feat/868: Move the logic of copying cs properties to parent class tts-task.js * feat/868: add empty line that was removed, say.js restored to original version * feat/868: moved _synthesizeWithSpecificVendor to tts-task.js --------- Co-authored-by: Rammohan Yadavalli <rammohan.yadavalli@kore.com>
2026-02-13 09:49:30 +00:00 · 2024-08-20 18:01:44 +05:30
parent fc838512b6
commit f95d8639be
2 changed files with 33 additions and 165 deletions
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -61,147 +61,6 @@ class TaskSay extends TtsTask {
    }
  }

-  async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
-    const {srf, accountSid:account_sid} = cs;
-    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
-    const {writeAlerts, AlertType, stats} = srf.locals;
-    const {synthAudio} = srf.locals.dbHelpers;
-    const engine = this.synthesizer.engine || cs.synthesizer?.engine || 'neural';
-    const salt = cs.callSid;
-
-    let credentials = cs.getSpeechCredentials(vendor, 'tts', label);
-    /* parse Nuance voices into name and model */
-    let model;
-    if (vendor === 'nuance' && voice) {
-      const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
-      if (arr) {
-        voice = arr[1];
-        model = arr[2];
-      }
-    } else if (vendor === 'deepgram') {
-      model = voice;
-    }
-
-    /* allow for microsoft custom region voice and api_key to be specified as an override */
-    if (vendor === 'microsoft' && this.options.deploymentId) {
-      credentials = credentials || {};
-      credentials.use_custom_tts = true;
-      credentials.custom_tts_endpoint = this.options.deploymentId;
-      credentials.api_key = this.options.apiKey || credentials.apiKey;
-      credentials.region = this.options.region || credentials.region;
-      voice = this.options.voice || voice;
-    } else if (vendor === 'elevenlabs') {
-      credentials = credentials || {};
-      credentials.model_id = this.options.model_id || credentials.model_id;
-      credentials.voice_settings = this.options.voice_settings || {};
-      credentials.optimize_streaming_latency = this.options.optimize_streaming_latency
-      || credentials.optimize_streaming_latency;
-      voice = this.options.voice_id || voice;
-    }
-
-    ep.set({
-      tts_engine: vendor.startsWith('custom:') ? 'custom' : vendor,
-      tts_voice: voice,
-      cache_speech_handles: !cs.currentTtsVendor || cs.currentTtsVendor === vendor ? 1 : 0,
-    }).catch((err) => this.logger.info({err}, 'Error setting tts_engine on endpoint'));
-    // set the current vendor on the call session
-    // If vendor is changed from the previous one, then reset the cache_speech_handles flag
-    cs.currentTtsVendor = vendor;
-
-    if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
-    try {
-      if (!credentials) {
-        writeAlerts({
-          account_sid,
-          alert_type: AlertType.TTS_NOT_PROVISIONED,
-          vendor,
-          target_sid: cs.callSid
-        }).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
-        throw new Error('no provisioned speech credentials for TTS');
-      }
-      // synthesize all of the text elements
-      let lastUpdated = false;
-
-      /* produce an audio segment from the provided text */
-      const generateAudio = async(text) => {
-        if (this.killed) return;
-        if (text.startsWith('silence_stream://')) return text;
-
-        /* otel: trace time for tts */
-        if (!preCache && !this._disableTracing)  {
-          const {span} = this.startChildSpan('tts-generation', {
-            'tts.vendor': vendor,
-            'tts.language': language,
-            'tts.voice': voice
-          });
-          this.otelSpan = span;
-        }
-        try {
-          const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
-            account_sid,
-            text,
-            vendor,
-            language,
-            voice,
-            engine,
-            model,
-            salt,
-            credentials,
-            options: this.options,
-            disableTtsCache : this.disableTtsCache,
-            renderForCaching: preCache
-          });
-          if (!filePath.startsWith('say:')) {
-            this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`);
-            if (filePath) cs.trackTmpFile(filePath);
-            if (this.otelSpan) {
-              this.otelSpan.setAttributes({'tts.cached': servedFromCache});
-              this.otelSpan.end();
-              this.otelSpan = null;
-            }
-            if (!servedFromCache && !lastUpdated) {
-              lastUpdated = true;
-              updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */});
-            }
-            if (!servedFromCache && rtt && !preCache  && !this._disableTracing) {
-              this.notifyStatus({
-                event: 'synthesized-audio',
-                vendor,
-                language,
-                characters: text.length,
-                elapsedTime: rtt
-              });
-            }
-          }
-          else {
-            this.logger.debug('Say: a streaming tts api will be used');
-            const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`);
-            return modifiedPath;
-          }
-          return filePath;
-        } catch (err) {
-          this.logger.info({err}, 'Error synthesizing tts');
-          if (this.otelSpan) this.otelSpan.end();
-          writeAlerts({
-            account_sid: cs.accountSid,
-            alert_type: AlertType.TTS_FAILURE,
-            vendor,
-            detail: err.message,
-            target_sid: cs.callSid
-          }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
-          throw err;
-        }
-      };
-
-      const arr = this.text.map((t) => (this._validateURL(t) ? t : generateAudio(t)));
-      return (await Promise.all(arr)).filter((fp) => fp && fp.length);
-    } catch (err) {
-      this.logger.info(err, 'TaskSay:exec error');
-      throw err;
-    }
-
-  }
-
  async exec(cs, {ep}) {
    const {srf, accountSid:account_sid, callSid:target_sid} = cs;
    const {writeAlerts, AlertType} = srf.locals;
--- a/lib/tasks/tts-task.js
+++ b/lib/tasks/tts-task.js
@@ -17,16 +17,26 @@ class TtsTask extends Task {

  async exec(cs) {
    super.exec(cs);
+    if (cs.synthesizer) {
+      this.options = {...cs.synthesizer.options, ...this.options};
+      this.data.synthesizer = this.data.synthesizer || {}
+      for (const k in cs.synthesizer) {
+        const newValue = this.data.synthesizer && this.data.synthesizer[k] !== undefined ?
+          this.data.synthesizer[k] :
+          cs.synthesizer[k];
+
+        if (Array.isArray(newValue)) {
+          this.data.synthesizer[k] = [...(this.data.synthesizer[k] || []), ...cs.synthesizer[k]];
+        } else if (typeof newValue === 'object' && newValue !== null) {
+          this.data.synthesizer[k] = { ...(this.data.synthesizer[k] || {}), ...cs.synthesizer[k] };
+        } else {
+          this.data.synthesizer[k] = newValue;
+        }
+      }
+    }
  }

-  async _synthesizeWithSpecificVendor(cs, ep, {
-    vendor,
-    language,
-    voice,
-    label,
-    disableTtsStreaming,
-    preCache
-  }) {
+  async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
    const {srf, accountSid:account_sid} = cs;
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
    const {writeAlerts, AlertType, stats} = srf.locals;
@@ -65,23 +75,23 @@ class TtsTask extends Task {
    }

    ep.set({
-      tts_engine: vendor,
+      tts_engine: vendor.startsWith('custom:') ? 'custom' : vendor,
      tts_voice: voice,
-      cache_speech_handles: 1,
-    }).catch((err) => this.logger.info({err}, `${this.name}: Error setting tts_engine on endpoint`));
+      cache_speech_handles: !cs.currentTtsVendor || cs.currentTtsVendor === vendor ? 1 : 0,
+    }).catch((err) => this.logger.info({err}, 'Error setting tts_engine on endpoint'));
+    // set the current vendor on the call session
+    // If vendor is changed from the previous one, then reset the cache_speech_handles flag
+    cs.currentTtsVendor = vendor;

-    if (!preCache) this.logger.info({vendor, language, voice, model}, `${this.name}:exec`);
+    if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
    try {
      if (!credentials) {
        writeAlerts({
          account_sid,
          alert_type: AlertType.TTS_NOT_PROVISIONED,
-          vendor
+          vendor,
+          target_sid: cs.callSid
        }).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
-        this.notifyError({
-          msg: 'TTS error',
-          details:`No speech credentials provisioned for selected vendor ${vendor}`
-        });
        throw new Error('no provisioned speech credentials for TTS');
      }
      // synthesize all of the text elements
@@ -93,7 +103,7 @@ class TtsTask extends Task {
        if (text.startsWith('silence_stream://')) return text;

        /* otel: trace time for tts */
-        if (!preCache && !this.parentTask)  {
+        if (!preCache && !this._disableTracing)  {
          const {span} = this.startChildSpan('tts-generation', {
            'tts.vendor': vendor,
            'tts.language': language,
@@ -114,11 +124,10 @@ class TtsTask extends Task {
            credentials,
            options: this.options,
            disableTtsCache : this.disableTtsCache,
-            disableTtsStreaming,
-            preCache
+            renderForCaching: preCache
          });
          if (!filePath.startsWith('say:')) {
-            this.logger.debug(`file ${filePath}, served from cache ${servedFromCache}`);
+            this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`);
            if (filePath) cs.trackTmpFile(filePath);
            if (this.otelSpan) {
              this.otelSpan.setAttributes({'tts.cached': servedFromCache});
@@ -129,7 +138,7 @@ class TtsTask extends Task {
              lastUpdated = true;
              updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */});
            }
-            if (!servedFromCache && rtt && !preCache) {
+            if (!servedFromCache && rtt && !preCache  && !this._disableTracing) {
              this.notifyStatus({
                event: 'synthesized-audio',
                vendor,
@@ -140,7 +149,7 @@ class TtsTask extends Task {
            }
          }
          else {
-            this.logger.debug('a streaming tts api will be used');
+            this.logger.debug('Say: a streaming tts api will be used');
            const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`);
            return modifiedPath;
          }
@@ -155,7 +164,6 @@ class TtsTask extends Task {
            detail: err.message,
            target_sid: cs.callSid
          }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
-          this.notifyError({msg: 'TTS error', details: err.message || err});
          throw err;
        }
      };
@@ -166,6 +174,7 @@ class TtsTask extends Task {
      this.logger.info(err, 'TaskSay:exec error');
      throw err;
    }
+
  }

  _validateURL(urlString) {