From 769b66f57e567cdcc00d5d84a62d9e24d718178e Mon Sep 17 00:00:00 2001 From: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com> Date: Thu, 20 Nov 2025 07:00:44 +0700 Subject: [PATCH] fixed playbackIds is not in correct order compare with say.text array (#1439) * fixed playbackIds is not in correct order compare with say.text array * wip * wip --- lib/tasks/tts-task.js | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/lib/tasks/tts-task.js b/lib/tasks/tts-task.js index 3dc6e716..2663005c 100644 --- a/lib/tasks/tts-task.js +++ b/lib/tasks/tts-task.js @@ -279,9 +279,9 @@ class TtsTask extends Task { } /* produce an audio segment from the provided text */ - const generateAudio = async(text) => { - if (this.killed) return; - if (text.startsWith('silence_stream://')) return text; + const generateAudio = async(text, index) => { + if (this.killed) return {index, filePath: null}; + if (text.startsWith('silence_stream://')) return {index, filePath: text}; /* otel: trace time for tts */ if (!preCache && !this._disableTracing) { @@ -310,7 +310,6 @@ class TtsTask extends Task { renderForCaching: preCache }); if (!filePath.startsWith('say:')) { - this.playbackIds.push(null); this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`); if (filePath) cs.trackTmpFile(filePath); if (this.otelSpan) { @@ -338,10 +337,11 @@ class TtsTask extends Task { 'id': this.id }); } + return {index, filePath, playbackId: null}; } else { - this.playbackIds.push(extractPlaybackId(filePath)); - this.logger.debug({playbackIds: this.playbackIds}, 'Say: a streaming tts api will be used'); + const playbackId = extractPlaybackId(filePath); + this.logger.debug('Say: a streaming tts api will be used'); const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`); this.notifyStatus({ event: 'synthesized-audio', @@ -350,9 +350,8 @@ class TtsTask extends Task { servedFromCache, 'id': this.id }); - return modifiedPath; + return {index, filePath: modifiedPath, playbackId}; } - return filePath; } catch (err) { this.logger.info({err}, 'Error synthesizing tts'); if (this.otelSpan) this.otelSpan.end(); @@ -367,8 +366,20 @@ class TtsTask extends Task { } }; - const arr = this.text.map((t) => (this._validateURL(t) ? t : generateAudio(t))); - return (await Promise.all(arr)).filter((fp) => fp && fp.length); + // process all text segments in parallel will cause ordering issue + // so we attach index to each promise result and sort them later + + const arr = this.text.map((t, index) => (this._validateURL(t) ? + Promise.resolve({index, filePath: t, playbackId: null}) : generateAudio(t, index))); + const results = await Promise.all(arr); + const sorted = results.sort((a, b) => a.index - b.index); + + return sorted + .filter((fp) => fp.filePath && fp.filePath.length) + .map((r) => { + this.playbackIds.push(r.playbackId); + return r.filePath; + }); } catch (err) { this.logger.info(err, 'TaskSay:exec error'); throw err;