diff --git a/lib/tasks/say.js b/lib/tasks/say.js index 0e932599..3dc56d10 100644 --- a/lib/tasks/say.js +++ b/lib/tasks/say.js @@ -61,147 +61,6 @@ class TaskSay extends TtsTask { } } - async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) { - const {srf, accountSid:account_sid} = cs; - const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); - const {writeAlerts, AlertType, stats} = srf.locals; - const {synthAudio} = srf.locals.dbHelpers; - const engine = this.synthesizer.engine || cs.synthesizer?.engine || 'neural'; - const salt = cs.callSid; - - let credentials = cs.getSpeechCredentials(vendor, 'tts', label); - /* parse Nuance voices into name and model */ - let model; - if (vendor === 'nuance' && voice) { - const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice); - if (arr) { - voice = arr[1]; - model = arr[2]; - } - } else if (vendor === 'deepgram') { - model = voice; - } - - /* allow for microsoft custom region voice and api_key to be specified as an override */ - if (vendor === 'microsoft' && this.options.deploymentId) { - credentials = credentials || {}; - credentials.use_custom_tts = true; - credentials.custom_tts_endpoint = this.options.deploymentId; - credentials.api_key = this.options.apiKey || credentials.apiKey; - credentials.region = this.options.region || credentials.region; - voice = this.options.voice || voice; - } else if (vendor === 'elevenlabs') { - credentials = credentials || {}; - credentials.model_id = this.options.model_id || credentials.model_id; - credentials.voice_settings = this.options.voice_settings || {}; - credentials.optimize_streaming_latency = this.options.optimize_streaming_latency - || credentials.optimize_streaming_latency; - voice = this.options.voice_id || voice; - } - - ep.set({ - tts_engine: vendor.startsWith('custom:') ? 'custom' : vendor, - tts_voice: voice, - cache_speech_handles: !cs.currentTtsVendor || cs.currentTtsVendor === vendor ? 1 : 0, - }).catch((err) => this.logger.info({err}, 'Error setting tts_engine on endpoint')); - // set the current vendor on the call session - // If vendor is changed from the previous one, then reset the cache_speech_handles flag - cs.currentTtsVendor = vendor; - - if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec'); - try { - if (!credentials) { - writeAlerts({ - account_sid, - alert_type: AlertType.TTS_NOT_PROVISIONED, - vendor, - target_sid: cs.callSid - }).catch((err) => this.logger.info({err}, 'Error generating alert for no tts')); - throw new Error('no provisioned speech credentials for TTS'); - } - // synthesize all of the text elements - let lastUpdated = false; - - /* produce an audio segment from the provided text */ - const generateAudio = async(text) => { - if (this.killed) return; - if (text.startsWith('silence_stream://')) return text; - - /* otel: trace time for tts */ - if (!preCache && !this._disableTracing) { - const {span} = this.startChildSpan('tts-generation', { - 'tts.vendor': vendor, - 'tts.language': language, - 'tts.voice': voice - }); - this.otelSpan = span; - } - try { - const {filePath, servedFromCache, rtt} = await synthAudio(stats, { - account_sid, - text, - vendor, - language, - voice, - engine, - model, - salt, - credentials, - options: this.options, - disableTtsCache : this.disableTtsCache, - renderForCaching: preCache - }); - if (!filePath.startsWith('say:')) { - this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`); - if (filePath) cs.trackTmpFile(filePath); - if (this.otelSpan) { - this.otelSpan.setAttributes({'tts.cached': servedFromCache}); - this.otelSpan.end(); - this.otelSpan = null; - } - if (!servedFromCache && !lastUpdated) { - lastUpdated = true; - updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */}); - } - if (!servedFromCache && rtt && !preCache && !this._disableTracing) { - this.notifyStatus({ - event: 'synthesized-audio', - vendor, - language, - characters: text.length, - elapsedTime: rtt - }); - } - } - else { - this.logger.debug('Say: a streaming tts api will be used'); - const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`); - return modifiedPath; - } - return filePath; - } catch (err) { - this.logger.info({err}, 'Error synthesizing tts'); - if (this.otelSpan) this.otelSpan.end(); - writeAlerts({ - account_sid: cs.accountSid, - alert_type: AlertType.TTS_FAILURE, - vendor, - detail: err.message, - target_sid: cs.callSid - }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure')); - throw err; - } - }; - - const arr = this.text.map((t) => (this._validateURL(t) ? t : generateAudio(t))); - return (await Promise.all(arr)).filter((fp) => fp && fp.length); - } catch (err) { - this.logger.info(err, 'TaskSay:exec error'); - throw err; - } - - } - async exec(cs, {ep}) { const {srf, accountSid:account_sid, callSid:target_sid} = cs; const {writeAlerts, AlertType} = srf.locals; diff --git a/lib/tasks/tts-task.js b/lib/tasks/tts-task.js index b680448a..4c96fded 100644 --- a/lib/tasks/tts-task.js +++ b/lib/tasks/tts-task.js @@ -17,16 +17,26 @@ class TtsTask extends Task { async exec(cs) { super.exec(cs); + if (cs.synthesizer) { + this.options = {...cs.synthesizer.options, ...this.options}; + this.data.synthesizer = this.data.synthesizer || {} + for (const k in cs.synthesizer) { + const newValue = this.data.synthesizer && this.data.synthesizer[k] !== undefined ? + this.data.synthesizer[k] : + cs.synthesizer[k]; + + if (Array.isArray(newValue)) { + this.data.synthesizer[k] = [...(this.data.synthesizer[k] || []), ...cs.synthesizer[k]]; + } else if (typeof newValue === 'object' && newValue !== null) { + this.data.synthesizer[k] = { ...(this.data.synthesizer[k] || {}), ...cs.synthesizer[k] }; + } else { + this.data.synthesizer[k] = newValue; + } + } + } } - async _synthesizeWithSpecificVendor(cs, ep, { - vendor, - language, - voice, - label, - disableTtsStreaming, - preCache - }) { + async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) { const {srf, accountSid:account_sid} = cs; const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {writeAlerts, AlertType, stats} = srf.locals; @@ -65,23 +75,23 @@ class TtsTask extends Task { } ep.set({ - tts_engine: vendor, + tts_engine: vendor.startsWith('custom:') ? 'custom' : vendor, tts_voice: voice, - cache_speech_handles: 1, - }).catch((err) => this.logger.info({err}, `${this.name}: Error setting tts_engine on endpoint`)); + cache_speech_handles: !cs.currentTtsVendor || cs.currentTtsVendor === vendor ? 1 : 0, + }).catch((err) => this.logger.info({err}, 'Error setting tts_engine on endpoint')); + // set the current vendor on the call session + // If vendor is changed from the previous one, then reset the cache_speech_handles flag + cs.currentTtsVendor = vendor; - if (!preCache) this.logger.info({vendor, language, voice, model}, `${this.name}:exec`); + if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec'); try { if (!credentials) { writeAlerts({ account_sid, alert_type: AlertType.TTS_NOT_PROVISIONED, - vendor + vendor, + target_sid: cs.callSid }).catch((err) => this.logger.info({err}, 'Error generating alert for no tts')); - this.notifyError({ - msg: 'TTS error', - details:`No speech credentials provisioned for selected vendor ${vendor}` - }); throw new Error('no provisioned speech credentials for TTS'); } // synthesize all of the text elements @@ -93,7 +103,7 @@ class TtsTask extends Task { if (text.startsWith('silence_stream://')) return text; /* otel: trace time for tts */ - if (!preCache && !this.parentTask) { + if (!preCache && !this._disableTracing) { const {span} = this.startChildSpan('tts-generation', { 'tts.vendor': vendor, 'tts.language': language, @@ -114,11 +124,10 @@ class TtsTask extends Task { credentials, options: this.options, disableTtsCache : this.disableTtsCache, - disableTtsStreaming, - preCache + renderForCaching: preCache }); if (!filePath.startsWith('say:')) { - this.logger.debug(`file ${filePath}, served from cache ${servedFromCache}`); + this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`); if (filePath) cs.trackTmpFile(filePath); if (this.otelSpan) { this.otelSpan.setAttributes({'tts.cached': servedFromCache}); @@ -129,7 +138,7 @@ class TtsTask extends Task { lastUpdated = true; updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */}); } - if (!servedFromCache && rtt && !preCache) { + if (!servedFromCache && rtt && !preCache && !this._disableTracing) { this.notifyStatus({ event: 'synthesized-audio', vendor, @@ -140,7 +149,7 @@ class TtsTask extends Task { } } else { - this.logger.debug('a streaming tts api will be used'); + this.logger.debug('Say: a streaming tts api will be used'); const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`); return modifiedPath; } @@ -155,7 +164,6 @@ class TtsTask extends Task { detail: err.message, target_sid: cs.callSid }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure')); - this.notifyError({msg: 'TTS error', details: err.message || err}); throw err; } }; @@ -166,6 +174,7 @@ class TtsTask extends Task { this.logger.info(err, 'TaskSay:exec error'); throw err; } + } _validateURL(urlString) {