diff --git a/lib/config.js b/lib/config.js index 5a6ab44a..81e54c5c 100644 --- a/lib/config.js +++ b/lib/config.js @@ -130,6 +130,8 @@ const JAMBONZ_RECORD_WS_PASSWORD = process.env.JAMBONZ_RECORD_WS_PASSWORD || pro const JAMBONZ_DISABLE_DIAL_PAI_HEADER = process.env.JAMBONZ_DISABLE_DIAL_PAI_HEADER || false; const JAMBONES_DISABLE_DIRECT_P2P_CALL = process.env.JAMBONES_DISABLE_DIRECT_P2P_CALL || false; +const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO; + module.exports = { JAMBONES_MYSQL_HOST, JAMBONES_MYSQL_USER, @@ -152,6 +154,7 @@ module.exports = { JAMBONES_API_BASE_URL, JAMBONES_TIME_SERIES_HOST, JAMBONES_INJECT_CONTENT, + JAMBONES_EAGERLY_PRE_CACHE_AUDIO, JAMBONES_ESL_LISTEN_ADDRESS, JAMBONES_SBCS, JAMBONES_OTEL_ENABLED, diff --git a/lib/session/call-session.js b/lib/session/call-session.js index 70d12838..e32dd4d0 100644 --- a/lib/session/call-session.js +++ b/lib/session/call-session.js @@ -19,6 +19,7 @@ const HttpRequestor = require('../utils/http-requestor'); const WsRequestor = require('../utils/ws-requestor'); const { JAMBONES_INJECT_CONTENT, + JAMBONES_EAGERLY_PRE_CACHE_AUDIO, AWS_REGION, } = require('../config'); const BackgroundTaskManager = require('../utils/background-task-manager'); @@ -1330,6 +1331,35 @@ Duration=${duration} ` this.taskIdx = 0; } + _preCacheAudio(newTasks) { + for (const task of newTasks) { + if (task.name === TaskName.Config && task.hasSynthesizer) { + /* if they change synthesizer settings don't try to precache */ + break; + } + if (task.name === TaskName.Say) { + /* identify vendor language, voice, and label */ + const vendor = task.synthesizer.vendor && task.synthesizer.vendor !== 'default' ? + task.synthesizer.vendor : + this.speechSynthesisVendor; + const language = task.synthesizer.language && task.synthesizer.language !== 'default' ? + task.synthesizer.language : + this.speechSynthesisLanguage ; + const voice = task.synthesizer.voice && task.synthesizer.voice !== 'default' ? + task.synthesizer.voice : + this.speechSynthesisVoice; + const label = task.synthesizer.label && task.synthesizer.label !== 'default' ? + task.synthesizer.label : + this.speechSynthesisLabel; + + this.logger.info({vendor, language, voice, label}, + 'CallSession:_preCacheAudio - precaching audio for future prompt'); + task._synthesizeWithSpecificVendor(this, this.ep, {vendor, language, voice, label, preCache: true}) + .catch((err) => this.logger.error(err, 'CallSession:_preCacheAudio - error precaching audio')); + } + } + } + /** * Append tasks to the current execution stack UNLESS there is a gather in the stack. * in that case, insert the tasks before the gather AND if the tasks include @@ -1387,10 +1417,12 @@ Duration=${duration} ` this.replaceApplication(t); } else if (JAMBONES_INJECT_CONTENT) { + if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t); this._injectTasks(t); this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list'); } else { + if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t); this.tasks.push(...t); this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list'); } diff --git a/lib/tasks/say.js b/lib/tasks/say.js index 8523b114..091cf13b 100644 --- a/lib/tasks/say.js +++ b/lib/tasks/say.js @@ -59,7 +59,7 @@ class TaskSay extends Task { } } - async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label}) { + async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) { const {srf} = cs; const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {writeAlerts, AlertType, stats} = srf.locals; @@ -97,7 +97,7 @@ class TaskSay extends Task { voice = this.options.voice_id || voice; } - this.logger.info({vendor, language, voice, model}, 'TaskSay:exec'); + if (!preCache) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec'); try { if (!credentials) { writeAlerts({ @@ -120,11 +120,15 @@ class TaskSay extends Task { if (text.startsWith('silence_stream://')) return text; /* otel: trace time for tts */ - const {span} = this.startChildSpan('tts-generation', { - 'tts.vendor': vendor, - 'tts.language': language, - 'tts.voice': voice - }); + let otelSpan; + if (!preCache) { + const {span} = this.startChildSpan('tts-generation', { + 'tts.vendor': vendor, + 'tts.language': language, + 'tts.voice': voice + }); + otelSpan = span; + } try { const {filePath, servedFromCache, rtt} = await synthAudio(stats, { account_sid: cs.accountSid, @@ -146,9 +150,9 @@ class TaskSay extends Task { updateSpeechCredentialLastUsed(credentials.speech_credential_sid) .catch(() => {/*already logged error */}); } - span.setAttributes({'tts.cached': servedFromCache}); - span.end(); - if (!servedFromCache && rtt) { + if (otelSpan) otelSpan.setAttributes({'tts.cached': servedFromCache}); + if (otelSpan) otelSpan.end(); + if (!servedFromCache && rtt && !preCache) { this.notifyStatus({ event: 'synthesized-audio', vendor, @@ -160,7 +164,7 @@ class TaskSay extends Task { return filePath; } catch (err) { this.logger.info({err}, 'Error synthesizing tts'); - span.end(); + if (otelSpan) otelSpan.end(); writeAlerts({ account_sid: cs.accountSid, alert_type: AlertType.TTS_FAILURE,