Fix/cached audio race (#1279)

* fix for race condition when play-stop event from earlier command received

* wip

* say verb should not cache if disableTtsCache = true

* logging

* modify race condition logic to validate playback id in playback-stopped matches that from playback-start

* logging

---------

Co-authored-by: Quan HL <quan.luuhoang8@gmail.com>
This commit is contained in:
Dave Horton
2025-07-13 10:56:41 -04:00
committed by GitHub
parent e6e039e0f2
commit e415420150

View File

@@ -247,6 +247,7 @@ class TaskSay extends TtsTask {
throw new SpeechCredentialError(error.message); throw new SpeechCredentialError(error.message);
} }
}; };
let filepath; let filepath;
try { try {
filepath = await this._synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label}); filepath = await this._synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label});
@@ -263,83 +264,113 @@ class TaskSay extends TtsTask {
await this.playToConfMember(ep, memberId, confName, confUuid, filepath[segment]); await this.playToConfMember(ep, memberId, confName, confUuid, filepath[segment]);
} }
else { else {
let playbackId;
const isStreaming = filepath[segment].startsWith('say:{'); const isStreaming = filepath[segment].startsWith('say:{');
if (isStreaming) { if (isStreaming) {
const arr = /^say:\{.*\}\s*(.*)$/.exec(filepath[segment]); const arr = /^say:\{.*\}\s*(.*)$/.exec(filepath[segment]);
if (arr) this.logger.debug(`Say:exec sending streaming tts request: ${arr[1].substring(0, 64)}..`); if (arr) this.logger.debug(`Say:exec sending streaming tts request: ${arr[1].substring(0, 64)}..`);
} }
else this.logger.debug(`Say:exec sending ${filepath[segment].substring(0, 64)}`); else {
ep.once('playback-start', (evt) => { this.logger.debug(`Say:exec sending ${filepath[segment].substring(0, 64)}`);
this.logger.debug({evt}, 'Say got playback-start'); }
if (this.otelSpan) {
this._addStreamingTtsAttributes(this.otelSpan, evt, vendor); const onPlaybackStop = (evt) => {
this.otelSpan.end(); try {
this.otelSpan = null; this.logger.debug({evt},
if (evt.variable_tts_cache_filename) { `Say got playback-stop ${evt.variable_tts_playback_id ? evt.variable_tts_playback_id : ''}`);
cs.trackTmpFile(evt.variable_tts_cache_filename); const unmatchedResponse = !!evt.variable_tts_playback_id && evt.variable_tts_playback_id !== playbackId;
if (unmatchedResponse) {
this.logger.info({currentPlaybackId: playbackId, stopPPlaybackId: evt.variable_tts_playback_id},
'Say:exec discarding playback-stop for earlier play');
ep.once('playback-stop', this._boundOnPlaybackStop);
return;
} }
}
}); this.notifyStatus({event: 'stop-playback'});
ep.once('playback-stop', (evt) => { this.notifiedPlayBackStop = true;
this.logger.debug({evt}, 'Say got playback-stop'); const tts_error = evt.variable_tts_error;
this.notifyStatus({event: 'stop-playback'}); // some tts vendor may not provide response code, so we default to 200
this.notifiedPlayBackStop = true; let response_code = 200;
const tts_error = evt.variable_tts_error; // Check if any property ends with _response_code
// some tts vendor may not provide response code, so we default to 200 for (const [key, value] of Object.entries(evt)) {
let response_code = 200; if (key.endsWith('_response_code')) {
// Check if any property ends with _response_code response_code = parseInt(value, 10);
for (const [key, value] of Object.entries(evt)) { if (isNaN(response_code)) {
if (key.endsWith('_response_code')) { this.logger.info(`Say:exec playback-stop - Invalid response code: ${value}`);
response_code = parseInt(value, 10); response_code = 0;
if (isNaN(response_code)) { }
this.logger.info(`Say:exec playback-stop - Invalid response code: ${value}`); break;
response_code = 0;
} }
break;
} }
}
if (tts_error || if (tts_error ||
// error response codes indicate failure
response_code <= 199 || response_code >= 300) {
writeAlerts({
account_sid,
alert_type: AlertType.TTS_FAILURE,
vendor,
detail: evt.variable_tts_error || `TTS playback failed with response code ${response_code}`,
target_sid
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
}
if (
!tts_error &&
//2xx response codes indicate success
199 < response_code && response_code < 300 &&
evt.variable_tts_cache_filename &&
!this.killed
) {
const text = parseTextFromSayString(this.text[segment]);
addFileToCache(evt.variable_tts_cache_filename, {
account_sid,
vendor,
language,
voice,
engine,
model: this.model || this.model_id,
text,
instructions: this.instructions
}).catch((err) => this.logger.info({err}, 'Error adding file to cache'));
}
if (this._playResolve) {
(tts_error ||
// error response codes indicate failure // error response codes indicate failure
response_code <= 199 || response_code >= 300 response_code <= 199 || response_code >= 300) {
) ? writeAlerts({
this._playReject( account_sid,
new Error(evt.variable_tts_error || `TTS playback failed with response code ${response_code}`) alert_type: AlertType.TTS_FAILURE,
) : this._playResolve(); vendor,
detail: evt.variable_tts_error || `TTS playback failed with response code ${response_code}`,
target_sid
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
}
if (
!tts_error &&
//2xx response codes indicate success
199 < response_code && response_code < 300 &&
evt.variable_tts_cache_filename &&
!this.killed &&
// if tts cache is not disabled, add the file to cache
!this.disableTtsCache
) {
const text = parseTextFromSayString(this.text[segment]);
addFileToCache(evt.variable_tts_cache_filename, {
account_sid,
vendor,
language,
voice,
engine,
model: this.model || this.model_id,
text,
instructions: this.instructions
}).catch((err) => this.logger.info({err}, 'Error adding file to cache'));
}
if (this._playResolve) {
(tts_error ||
// error response codes indicate failure
response_code <= 199 || response_code >= 300
) ?
this._playReject(
new Error(evt.variable_tts_error || `TTS playback failed with response code ${response_code}`)
) : this._playResolve();
}
} catch (err) {
this.logger.info({err}, 'Error handling playback-stop event');
}
};
this._boundOnPlaybackStop = onPlaybackStop.bind(this);
ep.once('playback-start', (evt) => {
try {
playbackId = evt.variable_tts_playback_id;
this.logger.debug({evt},
`Say got playback-start ${evt.variable_tts_playback_id ? evt.variable_tts_playback_id : ''}`);
if (this.otelSpan) {
this._addStreamingTtsAttributes(this.otelSpan, evt, vendor);
this.otelSpan.end();
this.otelSpan = null;
if (evt.variable_tts_cache_filename) {
cs.trackTmpFile(evt.variable_tts_cache_filename);
}
}
} catch (err) {
this.logger.info({err}, 'Error handling playback-start event');
} }
}); });
ep.once('playback-stop', this._boundOnPlaybackStop);
// wait for playback-stop event received to confirm if the playback is successful // wait for playback-stop event received to confirm if the playback is successful
this._playPromise = new Promise((resolve, reject) => { this._playPromise = new Promise((resolve, reject) => {
this._playResolve = resolve; this._playResolve = resolve;