const Task = require('./task'); const { TaskPreconditions } = require('../utils/constants'); class TtsTask extends Task { constructor(logger, data, parentTask) { super(logger, data); this.parentTask = parentTask; this.preconditions = TaskPreconditions.Endpoint; this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia); this.synthesizer = this.data.synthesizer || {}; this.disableTtsCache = this.data.disableTtsCache; this.options = this.synthesizer.options || {}; } async exec(cs) { super.exec(cs); } async _synthesizeWithSpecificVendor(cs, ep, { vendor, language, voice, label, disableTtsStreaming, preCache }) { const {srf, accountSid:account_sid} = cs; const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {writeAlerts, AlertType, stats} = srf.locals; const {synthAudio} = srf.locals.dbHelpers; const engine = this.synthesizer.engine || cs.synthesizer?.engine || 'neural'; const salt = cs.callSid; let credentials = cs.getSpeechCredentials(vendor, 'tts', label); /* parse Nuance voices into name and model */ let model; if (vendor === 'nuance' && voice) { const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice); if (arr) { voice = arr[1]; model = arr[2]; } } else if (vendor === 'deepgram') { model = voice; } /* allow for microsoft custom region voice and api_key to be specified as an override */ if (vendor === 'microsoft' && this.options.deploymentId) { credentials = credentials || {}; credentials.use_custom_tts = true; credentials.custom_tts_endpoint = this.options.deploymentId; credentials.api_key = this.options.apiKey || credentials.apiKey; credentials.region = this.options.region || credentials.region; voice = this.options.voice || voice; } else if (vendor === 'elevenlabs') { credentials = credentials || {}; credentials.model_id = this.options.model_id || credentials.model_id; credentials.voice_settings = this.options.voice_settings || {}; credentials.optimize_streaming_latency = this.options.optimize_streaming_latency || credentials.optimize_streaming_latency; voice = this.options.voice_id || voice; } ep.set({ tts_engine: vendor, tts_voice: voice, cache_speech_handles: 1, }).catch((err) => this.logger.info({err}, `${this.name}: Error setting tts_engine on endpoint`)); if (!preCache) this.logger.info({vendor, language, voice, model}, `${this.name}:exec`); try { if (!credentials) { writeAlerts({ account_sid, alert_type: AlertType.TTS_NOT_PROVISIONED, vendor }).catch((err) => this.logger.info({err}, 'Error generating alert for no tts')); this.notifyError({ msg: 'TTS error', details:`No speech credentials provisioned for selected vendor ${vendor}` }); throw new Error('no provisioned speech credentials for TTS'); } // synthesize all of the text elements let lastUpdated = false; /* produce an audio segment from the provided text */ const generateAudio = async(text) => { if (this.killed) return; if (text.startsWith('silence_stream://')) return text; /* otel: trace time for tts */ if (!preCache && !this.parentTask) { const {span} = this.startChildSpan('tts-generation', { 'tts.vendor': vendor, 'tts.language': language, 'tts.voice': voice }); this.otelSpan = span; } try { const {filePath, servedFromCache, rtt} = await synthAudio(stats, { account_sid, text, vendor, language, voice, engine, model, salt, credentials, options: this.options, disableTtsCache : this.disableTtsCache, disableTtsStreaming, preCache }); if (!filePath.startsWith('say:')) { this.logger.debug(`file ${filePath}, served from cache ${servedFromCache}`); if (filePath) cs.trackTmpFile(filePath); if (this.otelSpan) { this.otelSpan.setAttributes({'tts.cached': servedFromCache}); this.otelSpan.end(); this.otelSpan = null; } if (!servedFromCache && !lastUpdated) { lastUpdated = true; updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */}); } if (!servedFromCache && rtt && !preCache) { this.notifyStatus({ event: 'synthesized-audio', vendor, language, characters: text.length, elapsedTime: rtt }); } } else { this.logger.debug('a streaming tts api will be used'); const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`); return modifiedPath; } return filePath; } catch (err) { this.logger.info({err}, 'Error synthesizing tts'); if (this.otelSpan) this.otelSpan.end(); writeAlerts({ account_sid: cs.accountSid, alert_type: AlertType.TTS_FAILURE, vendor, detail: err.message, target_sid: cs.callSid }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure')); this.notifyError({msg: 'TTS error', details: err.message || err}); throw err; } }; const arr = this.text.map((t) => (this._validateURL(t) ? t : generateAudio(t))); return (await Promise.all(arr)).filter((fp) => fp && fp.length); } catch (err) { this.logger.info(err, 'TaskSay:exec error'); throw err; } } _validateURL(urlString) { try { new URL(urlString); return true; } catch (e) { return false; } } } module.exports = TtsTask;