Feature/precache audio (#609)

* wip

* fix for establishing vendor etc

* more fixes

* avoid a pre-caching attempt if synth settings change
This commit is contained in:
Dave Horton
2024-01-13 12:51:25 -05:00
committed by GitHub
parent d3d494191f
commit 09a83e3a31
3 changed files with 50 additions and 11 deletions

View File

@@ -130,6 +130,8 @@ const JAMBONZ_RECORD_WS_PASSWORD = process.env.JAMBONZ_RECORD_WS_PASSWORD || pro
const JAMBONZ_DISABLE_DIAL_PAI_HEADER = process.env.JAMBONZ_DISABLE_DIAL_PAI_HEADER || false;
const JAMBONES_DISABLE_DIRECT_P2P_CALL = process.env.JAMBONES_DISABLE_DIRECT_P2P_CALL || false;
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;
module.exports = {
JAMBONES_MYSQL_HOST,
JAMBONES_MYSQL_USER,
@@ -152,6 +154,7 @@ module.exports = {
JAMBONES_API_BASE_URL,
JAMBONES_TIME_SERIES_HOST,
JAMBONES_INJECT_CONTENT,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
JAMBONES_ESL_LISTEN_ADDRESS,
JAMBONES_SBCS,
JAMBONES_OTEL_ENABLED,

View File

@@ -19,6 +19,7 @@ const HttpRequestor = require('../utils/http-requestor');
const WsRequestor = require('../utils/ws-requestor');
const {
JAMBONES_INJECT_CONTENT,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
AWS_REGION,
} = require('../config');
const BackgroundTaskManager = require('../utils/background-task-manager');
@@ -1330,6 +1331,35 @@ Duration=${duration} `
this.taskIdx = 0;
}
_preCacheAudio(newTasks) {
for (const task of newTasks) {
if (task.name === TaskName.Config && task.hasSynthesizer) {
/* if they change synthesizer settings don't try to precache */
break;
}
if (task.name === TaskName.Say) {
/* identify vendor language, voice, and label */
const vendor = task.synthesizer.vendor && task.synthesizer.vendor !== 'default' ?
task.synthesizer.vendor :
this.speechSynthesisVendor;
const language = task.synthesizer.language && task.synthesizer.language !== 'default' ?
task.synthesizer.language :
this.speechSynthesisLanguage ;
const voice = task.synthesizer.voice && task.synthesizer.voice !== 'default' ?
task.synthesizer.voice :
this.speechSynthesisVoice;
const label = task.synthesizer.label && task.synthesizer.label !== 'default' ?
task.synthesizer.label :
this.speechSynthesisLabel;
this.logger.info({vendor, language, voice, label},
'CallSession:_preCacheAudio - precaching audio for future prompt');
task._synthesizeWithSpecificVendor(this, this.ep, {vendor, language, voice, label, preCache: true})
.catch((err) => this.logger.error(err, 'CallSession:_preCacheAudio - error precaching audio'));
}
}
}
/**
* Append tasks to the current execution stack UNLESS there is a gather in the stack.
* in that case, insert the tasks before the gather AND if the tasks include
@@ -1387,10 +1417,12 @@ Duration=${duration} `
this.replaceApplication(t);
}
else if (JAMBONES_INJECT_CONTENT) {
if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t);
this._injectTasks(t);
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
}
else {
if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t);
this.tasks.push(...t);
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
}

View File

@@ -59,7 +59,7 @@ class TaskSay extends Task {
}
}
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label}) {
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
const {srf} = cs;
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals;
@@ -97,7 +97,7 @@ class TaskSay extends Task {
voice = this.options.voice_id || voice;
}
this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
if (!preCache) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
try {
if (!credentials) {
writeAlerts({
@@ -120,11 +120,15 @@ class TaskSay extends Task {
if (text.startsWith('silence_stream://')) return text;
/* otel: trace time for tts */
const {span} = this.startChildSpan('tts-generation', {
'tts.vendor': vendor,
'tts.language': language,
'tts.voice': voice
});
let otelSpan;
if (!preCache) {
const {span} = this.startChildSpan('tts-generation', {
'tts.vendor': vendor,
'tts.language': language,
'tts.voice': voice
});
otelSpan = span;
}
try {
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
account_sid: cs.accountSid,
@@ -146,9 +150,9 @@ class TaskSay extends Task {
updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
.catch(() => {/*already logged error */});
}
span.setAttributes({'tts.cached': servedFromCache});
span.end();
if (!servedFromCache && rtt) {
if (otelSpan) otelSpan.setAttributes({'tts.cached': servedFromCache});
if (otelSpan) otelSpan.end();
if (!servedFromCache && rtt && !preCache) {
this.notifyStatus({
event: 'synthesized-audio',
vendor,
@@ -160,7 +164,7 @@ class TaskSay extends Task {
return filePath;
} catch (err) {
this.logger.info({err}, 'Error synthesizing tts');
span.end();
if (otelSpan) otelSpan.end();
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.TTS_FAILURE,