Feature/precache audio (#609)

* wip

* fix for establishing vendor etc

* more fixes

* avoid a pre-caching attempt if synth settings change
This commit is contained in:
Dave Horton
2024-01-13 12:51:25 -05:00
committed by GitHub
parent d3d494191f
commit 09a83e3a31
3 changed files with 50 additions and 11 deletions

View File

@@ -130,6 +130,8 @@ const JAMBONZ_RECORD_WS_PASSWORD = process.env.JAMBONZ_RECORD_WS_PASSWORD || pro
const JAMBONZ_DISABLE_DIAL_PAI_HEADER = process.env.JAMBONZ_DISABLE_DIAL_PAI_HEADER || false; const JAMBONZ_DISABLE_DIAL_PAI_HEADER = process.env.JAMBONZ_DISABLE_DIAL_PAI_HEADER || false;
const JAMBONES_DISABLE_DIRECT_P2P_CALL = process.env.JAMBONES_DISABLE_DIRECT_P2P_CALL || false; const JAMBONES_DISABLE_DIRECT_P2P_CALL = process.env.JAMBONES_DISABLE_DIRECT_P2P_CALL || false;
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;
module.exports = { module.exports = {
JAMBONES_MYSQL_HOST, JAMBONES_MYSQL_HOST,
JAMBONES_MYSQL_USER, JAMBONES_MYSQL_USER,
@@ -152,6 +154,7 @@ module.exports = {
JAMBONES_API_BASE_URL, JAMBONES_API_BASE_URL,
JAMBONES_TIME_SERIES_HOST, JAMBONES_TIME_SERIES_HOST,
JAMBONES_INJECT_CONTENT, JAMBONES_INJECT_CONTENT,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
JAMBONES_ESL_LISTEN_ADDRESS, JAMBONES_ESL_LISTEN_ADDRESS,
JAMBONES_SBCS, JAMBONES_SBCS,
JAMBONES_OTEL_ENABLED, JAMBONES_OTEL_ENABLED,

View File

@@ -19,6 +19,7 @@ const HttpRequestor = require('../utils/http-requestor');
const WsRequestor = require('../utils/ws-requestor'); const WsRequestor = require('../utils/ws-requestor');
const { const {
JAMBONES_INJECT_CONTENT, JAMBONES_INJECT_CONTENT,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
AWS_REGION, AWS_REGION,
} = require('../config'); } = require('../config');
const BackgroundTaskManager = require('../utils/background-task-manager'); const BackgroundTaskManager = require('../utils/background-task-manager');
@@ -1330,6 +1331,35 @@ Duration=${duration} `
this.taskIdx = 0; this.taskIdx = 0;
} }
_preCacheAudio(newTasks) {
for (const task of newTasks) {
if (task.name === TaskName.Config && task.hasSynthesizer) {
/* if they change synthesizer settings don't try to precache */
break;
}
if (task.name === TaskName.Say) {
/* identify vendor language, voice, and label */
const vendor = task.synthesizer.vendor && task.synthesizer.vendor !== 'default' ?
task.synthesizer.vendor :
this.speechSynthesisVendor;
const language = task.synthesizer.language && task.synthesizer.language !== 'default' ?
task.synthesizer.language :
this.speechSynthesisLanguage ;
const voice = task.synthesizer.voice && task.synthesizer.voice !== 'default' ?
task.synthesizer.voice :
this.speechSynthesisVoice;
const label = task.synthesizer.label && task.synthesizer.label !== 'default' ?
task.synthesizer.label :
this.speechSynthesisLabel;
this.logger.info({vendor, language, voice, label},
'CallSession:_preCacheAudio - precaching audio for future prompt');
task._synthesizeWithSpecificVendor(this, this.ep, {vendor, language, voice, label, preCache: true})
.catch((err) => this.logger.error(err, 'CallSession:_preCacheAudio - error precaching audio'));
}
}
}
/** /**
* Append tasks to the current execution stack UNLESS there is a gather in the stack. * Append tasks to the current execution stack UNLESS there is a gather in the stack.
* in that case, insert the tasks before the gather AND if the tasks include * in that case, insert the tasks before the gather AND if the tasks include
@@ -1387,10 +1417,12 @@ Duration=${duration} `
this.replaceApplication(t); this.replaceApplication(t);
} }
else if (JAMBONES_INJECT_CONTENT) { else if (JAMBONES_INJECT_CONTENT) {
if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t);
this._injectTasks(t); this._injectTasks(t);
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list'); this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
} }
else { else {
if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t);
this.tasks.push(...t); this.tasks.push(...t);
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list'); this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
} }

View File

@@ -59,7 +59,7 @@ class TaskSay extends Task {
} }
} }
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label}) { async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
const {srf} = cs; const {srf} = cs;
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals; const {writeAlerts, AlertType, stats} = srf.locals;
@@ -97,7 +97,7 @@ class TaskSay extends Task {
voice = this.options.voice_id || voice; voice = this.options.voice_id || voice;
} }
this.logger.info({vendor, language, voice, model}, 'TaskSay:exec'); if (!preCache) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
try { try {
if (!credentials) { if (!credentials) {
writeAlerts({ writeAlerts({
@@ -120,11 +120,15 @@ class TaskSay extends Task {
if (text.startsWith('silence_stream://')) return text; if (text.startsWith('silence_stream://')) return text;
/* otel: trace time for tts */ /* otel: trace time for tts */
const {span} = this.startChildSpan('tts-generation', { let otelSpan;
'tts.vendor': vendor, if (!preCache) {
'tts.language': language, const {span} = this.startChildSpan('tts-generation', {
'tts.voice': voice 'tts.vendor': vendor,
}); 'tts.language': language,
'tts.voice': voice
});
otelSpan = span;
}
try { try {
const {filePath, servedFromCache, rtt} = await synthAudio(stats, { const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
account_sid: cs.accountSid, account_sid: cs.accountSid,
@@ -146,9 +150,9 @@ class TaskSay extends Task {
updateSpeechCredentialLastUsed(credentials.speech_credential_sid) updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
.catch(() => {/*already logged error */}); .catch(() => {/*already logged error */});
} }
span.setAttributes({'tts.cached': servedFromCache}); if (otelSpan) otelSpan.setAttributes({'tts.cached': servedFromCache});
span.end(); if (otelSpan) otelSpan.end();
if (!servedFromCache && rtt) { if (!servedFromCache && rtt && !preCache) {
this.notifyStatus({ this.notifyStatus({
event: 'synthesized-audio', event: 'synthesized-audio',
vendor, vendor,
@@ -160,7 +164,7 @@ class TaskSay extends Task {
return filePath; return filePath;
} catch (err) { } catch (err) {
this.logger.info({err}, 'Error synthesizing tts'); this.logger.info({err}, 'Error synthesizing tts');
span.end(); if (otelSpan) otelSpan.end();
writeAlerts({ writeAlerts({
account_sid: cs.accountSid, account_sid: cs.accountSid,
alert_type: AlertType.TTS_FAILURE, alert_type: AlertType.TTS_FAILURE,