mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
Feature/precache audio (#609)
* wip * fix for establishing vendor etc * more fixes * avoid a pre-caching attempt if synth settings change
This commit is contained in:
@@ -130,6 +130,8 @@ const JAMBONZ_RECORD_WS_PASSWORD = process.env.JAMBONZ_RECORD_WS_PASSWORD || pro
|
||||
const JAMBONZ_DISABLE_DIAL_PAI_HEADER = process.env.JAMBONZ_DISABLE_DIAL_PAI_HEADER || false;
|
||||
const JAMBONES_DISABLE_DIRECT_P2P_CALL = process.env.JAMBONES_DISABLE_DIRECT_P2P_CALL || false;
|
||||
|
||||
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;
|
||||
|
||||
module.exports = {
|
||||
JAMBONES_MYSQL_HOST,
|
||||
JAMBONES_MYSQL_USER,
|
||||
@@ -152,6 +154,7 @@ module.exports = {
|
||||
JAMBONES_API_BASE_URL,
|
||||
JAMBONES_TIME_SERIES_HOST,
|
||||
JAMBONES_INJECT_CONTENT,
|
||||
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
|
||||
JAMBONES_ESL_LISTEN_ADDRESS,
|
||||
JAMBONES_SBCS,
|
||||
JAMBONES_OTEL_ENABLED,
|
||||
|
||||
@@ -19,6 +19,7 @@ const HttpRequestor = require('../utils/http-requestor');
|
||||
const WsRequestor = require('../utils/ws-requestor');
|
||||
const {
|
||||
JAMBONES_INJECT_CONTENT,
|
||||
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
|
||||
AWS_REGION,
|
||||
} = require('../config');
|
||||
const BackgroundTaskManager = require('../utils/background-task-manager');
|
||||
@@ -1330,6 +1331,35 @@ Duration=${duration} `
|
||||
this.taskIdx = 0;
|
||||
}
|
||||
|
||||
_preCacheAudio(newTasks) {
|
||||
for (const task of newTasks) {
|
||||
if (task.name === TaskName.Config && task.hasSynthesizer) {
|
||||
/* if they change synthesizer settings don't try to precache */
|
||||
break;
|
||||
}
|
||||
if (task.name === TaskName.Say) {
|
||||
/* identify vendor language, voice, and label */
|
||||
const vendor = task.synthesizer.vendor && task.synthesizer.vendor !== 'default' ?
|
||||
task.synthesizer.vendor :
|
||||
this.speechSynthesisVendor;
|
||||
const language = task.synthesizer.language && task.synthesizer.language !== 'default' ?
|
||||
task.synthesizer.language :
|
||||
this.speechSynthesisLanguage ;
|
||||
const voice = task.synthesizer.voice && task.synthesizer.voice !== 'default' ?
|
||||
task.synthesizer.voice :
|
||||
this.speechSynthesisVoice;
|
||||
const label = task.synthesizer.label && task.synthesizer.label !== 'default' ?
|
||||
task.synthesizer.label :
|
||||
this.speechSynthesisLabel;
|
||||
|
||||
this.logger.info({vendor, language, voice, label},
|
||||
'CallSession:_preCacheAudio - precaching audio for future prompt');
|
||||
task._synthesizeWithSpecificVendor(this, this.ep, {vendor, language, voice, label, preCache: true})
|
||||
.catch((err) => this.logger.error(err, 'CallSession:_preCacheAudio - error precaching audio'));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Append tasks to the current execution stack UNLESS there is a gather in the stack.
|
||||
* in that case, insert the tasks before the gather AND if the tasks include
|
||||
@@ -1387,10 +1417,12 @@ Duration=${duration} `
|
||||
this.replaceApplication(t);
|
||||
}
|
||||
else if (JAMBONES_INJECT_CONTENT) {
|
||||
if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t);
|
||||
this._injectTasks(t);
|
||||
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
|
||||
}
|
||||
else {
|
||||
if (JAMBONES_EAGERLY_PRE_CACHE_AUDIO) this._preCacheAudio(t);
|
||||
this.tasks.push(...t);
|
||||
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ class TaskSay extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label}) {
|
||||
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
|
||||
const {srf} = cs;
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
|
||||
const {writeAlerts, AlertType, stats} = srf.locals;
|
||||
@@ -97,7 +97,7 @@ class TaskSay extends Task {
|
||||
voice = this.options.voice_id || voice;
|
||||
}
|
||||
|
||||
this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
|
||||
if (!preCache) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
|
||||
try {
|
||||
if (!credentials) {
|
||||
writeAlerts({
|
||||
@@ -120,11 +120,15 @@ class TaskSay extends Task {
|
||||
if (text.startsWith('silence_stream://')) return text;
|
||||
|
||||
/* otel: trace time for tts */
|
||||
const {span} = this.startChildSpan('tts-generation', {
|
||||
'tts.vendor': vendor,
|
||||
'tts.language': language,
|
||||
'tts.voice': voice
|
||||
});
|
||||
let otelSpan;
|
||||
if (!preCache) {
|
||||
const {span} = this.startChildSpan('tts-generation', {
|
||||
'tts.vendor': vendor,
|
||||
'tts.language': language,
|
||||
'tts.voice': voice
|
||||
});
|
||||
otelSpan = span;
|
||||
}
|
||||
try {
|
||||
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
|
||||
account_sid: cs.accountSid,
|
||||
@@ -146,9 +150,9 @@ class TaskSay extends Task {
|
||||
updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
|
||||
.catch(() => {/*already logged error */});
|
||||
}
|
||||
span.setAttributes({'tts.cached': servedFromCache});
|
||||
span.end();
|
||||
if (!servedFromCache && rtt) {
|
||||
if (otelSpan) otelSpan.setAttributes({'tts.cached': servedFromCache});
|
||||
if (otelSpan) otelSpan.end();
|
||||
if (!servedFromCache && rtt && !preCache) {
|
||||
this.notifyStatus({
|
||||
event: 'synthesized-audio',
|
||||
vendor,
|
||||
@@ -160,7 +164,7 @@ class TaskSay extends Task {
|
||||
return filePath;
|
||||
} catch (err) {
|
||||
this.logger.info({err}, 'Error synthesizing tts');
|
||||
span.end();
|
||||
if (otelSpan) otelSpan.end();
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.TTS_FAILURE,
|
||||
|
||||
Reference in New Issue
Block a user