Feat/868: Use global synthesizer config properties for say verb (#869)

* feat/868: Use the properties from global config in verb for TTS

* feat/868: setting this.options to combination of cs.synthesizer.options and this.options

* feat/868: Move the logic of copying cs properties to parent class tts-task.js

* feat/868: add empty line that was removed, say.js restored to original version

* feat/868: moved _synthesizeWithSpecificVendor to tts-task.js

---------

Co-authored-by: Rammohan Yadavalli <rammohan.yadavalli@kore.com>
This commit is contained in:
rammohan-y
2024-08-20 18:01:44 +05:30
committed by GitHub
parent fc838512b6
commit f95d8639be
2 changed files with 33 additions and 165 deletions

View File

@@ -61,147 +61,6 @@ class TaskSay extends TtsTask {
} }
} }
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
const {srf, accountSid:account_sid} = cs;
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals;
const {synthAudio} = srf.locals.dbHelpers;
const engine = this.synthesizer.engine || cs.synthesizer?.engine || 'neural';
const salt = cs.callSid;
let credentials = cs.getSpeechCredentials(vendor, 'tts', label);
/* parse Nuance voices into name and model */
let model;
if (vendor === 'nuance' && voice) {
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
if (arr) {
voice = arr[1];
model = arr[2];
}
} else if (vendor === 'deepgram') {
model = voice;
}
/* allow for microsoft custom region voice and api_key to be specified as an override */
if (vendor === 'microsoft' && this.options.deploymentId) {
credentials = credentials || {};
credentials.use_custom_tts = true;
credentials.custom_tts_endpoint = this.options.deploymentId;
credentials.api_key = this.options.apiKey || credentials.apiKey;
credentials.region = this.options.region || credentials.region;
voice = this.options.voice || voice;
} else if (vendor === 'elevenlabs') {
credentials = credentials || {};
credentials.model_id = this.options.model_id || credentials.model_id;
credentials.voice_settings = this.options.voice_settings || {};
credentials.optimize_streaming_latency = this.options.optimize_streaming_latency
|| credentials.optimize_streaming_latency;
voice = this.options.voice_id || voice;
}
ep.set({
tts_engine: vendor.startsWith('custom:') ? 'custom' : vendor,
tts_voice: voice,
cache_speech_handles: !cs.currentTtsVendor || cs.currentTtsVendor === vendor ? 1 : 0,
}).catch((err) => this.logger.info({err}, 'Error setting tts_engine on endpoint'));
// set the current vendor on the call session
// If vendor is changed from the previous one, then reset the cache_speech_handles flag
cs.currentTtsVendor = vendor;
if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
try {
if (!credentials) {
writeAlerts({
account_sid,
alert_type: AlertType.TTS_NOT_PROVISIONED,
vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
throw new Error('no provisioned speech credentials for TTS');
}
// synthesize all of the text elements
let lastUpdated = false;
/* produce an audio segment from the provided text */
const generateAudio = async(text) => {
if (this.killed) return;
if (text.startsWith('silence_stream://')) return text;
/* otel: trace time for tts */
if (!preCache && !this._disableTracing) {
const {span} = this.startChildSpan('tts-generation', {
'tts.vendor': vendor,
'tts.language': language,
'tts.voice': voice
});
this.otelSpan = span;
}
try {
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
account_sid,
text,
vendor,
language,
voice,
engine,
model,
salt,
credentials,
options: this.options,
disableTtsCache : this.disableTtsCache,
renderForCaching: preCache
});
if (!filePath.startsWith('say:')) {
this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`);
if (filePath) cs.trackTmpFile(filePath);
if (this.otelSpan) {
this.otelSpan.setAttributes({'tts.cached': servedFromCache});
this.otelSpan.end();
this.otelSpan = null;
}
if (!servedFromCache && !lastUpdated) {
lastUpdated = true;
updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */});
}
if (!servedFromCache && rtt && !preCache && !this._disableTracing) {
this.notifyStatus({
event: 'synthesized-audio',
vendor,
language,
characters: text.length,
elapsedTime: rtt
});
}
}
else {
this.logger.debug('Say: a streaming tts api will be used');
const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`);
return modifiedPath;
}
return filePath;
} catch (err) {
this.logger.info({err}, 'Error synthesizing tts');
if (this.otelSpan) this.otelSpan.end();
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.TTS_FAILURE,
vendor,
detail: err.message,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
throw err;
}
};
const arr = this.text.map((t) => (this._validateURL(t) ? t : generateAudio(t)));
return (await Promise.all(arr)).filter((fp) => fp && fp.length);
} catch (err) {
this.logger.info(err, 'TaskSay:exec error');
throw err;
}
}
async exec(cs, {ep}) { async exec(cs, {ep}) {
const {srf, accountSid:account_sid, callSid:target_sid} = cs; const {srf, accountSid:account_sid, callSid:target_sid} = cs;
const {writeAlerts, AlertType} = srf.locals; const {writeAlerts, AlertType} = srf.locals;

View File

@@ -17,16 +17,26 @@ class TtsTask extends Task {
async exec(cs) { async exec(cs) {
super.exec(cs); super.exec(cs);
if (cs.synthesizer) {
this.options = {...cs.synthesizer.options, ...this.options};
this.data.synthesizer = this.data.synthesizer || {}
for (const k in cs.synthesizer) {
const newValue = this.data.synthesizer && this.data.synthesizer[k] !== undefined ?
this.data.synthesizer[k] :
cs.synthesizer[k];
if (Array.isArray(newValue)) {
this.data.synthesizer[k] = [...(this.data.synthesizer[k] || []), ...cs.synthesizer[k]];
} else if (typeof newValue === 'object' && newValue !== null) {
this.data.synthesizer[k] = { ...(this.data.synthesizer[k] || {}), ...cs.synthesizer[k] };
} else {
this.data.synthesizer[k] = newValue;
}
}
}
} }
async _synthesizeWithSpecificVendor(cs, ep, { async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
vendor,
language,
voice,
label,
disableTtsStreaming,
preCache
}) {
const {srf, accountSid:account_sid} = cs; const {srf, accountSid:account_sid} = cs;
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf); const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals; const {writeAlerts, AlertType, stats} = srf.locals;
@@ -65,23 +75,23 @@ class TtsTask extends Task {
} }
ep.set({ ep.set({
tts_engine: vendor, tts_engine: vendor.startsWith('custom:') ? 'custom' : vendor,
tts_voice: voice, tts_voice: voice,
cache_speech_handles: 1, cache_speech_handles: !cs.currentTtsVendor || cs.currentTtsVendor === vendor ? 1 : 0,
}).catch((err) => this.logger.info({err}, `${this.name}: Error setting tts_engine on endpoint`)); }).catch((err) => this.logger.info({err}, 'Error setting tts_engine on endpoint'));
// set the current vendor on the call session
// If vendor is changed from the previous one, then reset the cache_speech_handles flag
cs.currentTtsVendor = vendor;
if (!preCache) this.logger.info({vendor, language, voice, model}, `${this.name}:exec`); if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
try { try {
if (!credentials) { if (!credentials) {
writeAlerts({ writeAlerts({
account_sid, account_sid,
alert_type: AlertType.TTS_NOT_PROVISIONED, alert_type: AlertType.TTS_NOT_PROVISIONED,
vendor vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts')); }).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
this.notifyError({
msg: 'TTS error',
details:`No speech credentials provisioned for selected vendor ${vendor}`
});
throw new Error('no provisioned speech credentials for TTS'); throw new Error('no provisioned speech credentials for TTS');
} }
// synthesize all of the text elements // synthesize all of the text elements
@@ -93,7 +103,7 @@ class TtsTask extends Task {
if (text.startsWith('silence_stream://')) return text; if (text.startsWith('silence_stream://')) return text;
/* otel: trace time for tts */ /* otel: trace time for tts */
if (!preCache && !this.parentTask) { if (!preCache && !this._disableTracing) {
const {span} = this.startChildSpan('tts-generation', { const {span} = this.startChildSpan('tts-generation', {
'tts.vendor': vendor, 'tts.vendor': vendor,
'tts.language': language, 'tts.language': language,
@@ -114,11 +124,10 @@ class TtsTask extends Task {
credentials, credentials,
options: this.options, options: this.options,
disableTtsCache : this.disableTtsCache, disableTtsCache : this.disableTtsCache,
disableTtsStreaming, renderForCaching: preCache
preCache
}); });
if (!filePath.startsWith('say:')) { if (!filePath.startsWith('say:')) {
this.logger.debug(`file ${filePath}, served from cache ${servedFromCache}`); this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`);
if (filePath) cs.trackTmpFile(filePath); if (filePath) cs.trackTmpFile(filePath);
if (this.otelSpan) { if (this.otelSpan) {
this.otelSpan.setAttributes({'tts.cached': servedFromCache}); this.otelSpan.setAttributes({'tts.cached': servedFromCache});
@@ -129,7 +138,7 @@ class TtsTask extends Task {
lastUpdated = true; lastUpdated = true;
updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */}); updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */});
} }
if (!servedFromCache && rtt && !preCache) { if (!servedFromCache && rtt && !preCache && !this._disableTracing) {
this.notifyStatus({ this.notifyStatus({
event: 'synthesized-audio', event: 'synthesized-audio',
vendor, vendor,
@@ -140,7 +149,7 @@ class TtsTask extends Task {
} }
} }
else { else {
this.logger.debug('a streaming tts api will be used'); this.logger.debug('Say: a streaming tts api will be used');
const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`); const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`);
return modifiedPath; return modifiedPath;
} }
@@ -155,7 +164,6 @@ class TtsTask extends Task {
detail: err.message, detail: err.message,
target_sid: cs.callSid target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure')); }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
this.notifyError({msg: 'TTS error', details: err.message || err});
throw err; throw err;
} }
}; };
@@ -166,6 +174,7 @@ class TtsTask extends Task {
this.logger.info(err, 'TaskSay:exec error'); this.logger.info(err, 'TaskSay:exec error');
throw err; throw err;
} }
} }
_validateURL(urlString) { _validateURL(urlString) {