diff --git a/lib/session/call-session.js b/lib/session/call-session.js index 11f7413a..2cfa35b1 100644 --- a/lib/session/call-session.js +++ b/lib/session/call-session.js @@ -1598,7 +1598,23 @@ Duration=${duration} ` } _preCacheAudio(newTasks) { - for (const task of newTasks) { + /** + * only precache audio for the a queued say if we have one or more non-Config verbs + * ahead of it in the queue. This is because the Config verb returns immediately + * and would not give us enough time to generate the audio. The point of precaching + * is to take advantage of getting the audio in advance of being needed, so we need + * to be confident we have some time before the say verb is executed, and the Config + * does not give us that confidence since it returns immediately. + */ + const allTasks = (this.tasks || []).concat(newTasks); + const skipFirst = allTasks.slice(1); + const idxFirstNotConfig = skipFirst.findIndex((t) => t.name !== TaskName.Config); + if (-1 === idxFirstNotConfig) return; + + const allTasksAfterFirstNotConfig = skipFirst.slice(idxFirstNotConfig); + + + for (const task of allTasksAfterFirstNotConfig) { if (task.name === TaskName.Config && task.hasSynthesizer) { /* if they change synthesizer settings don't try to precache */ break;