Merge pull request #84 from jambonz/feat/precache_audio_with_tts_stream

support precache audio with tts stream enabled
2026-01-25 02:08:26 +00:00 · 2024-08-12 09:26:00 -04:00
parent 73feadc4c4 ef23b0807a
commit b0d6272974
4 changed files with 42 additions and 10 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,6 +13,11 @@ jobs:
        with:
          node-version: lts/*
      - run: npm install
+      - name: Install Docker Compose
+        run: |
+          sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
+          sudo chmod +x /usr/local/bin/docker-compose
+          docker-compose --version
      - run: npm run jslint
      - run: sudo apt update && sudo apt install -y squid
      - run: sudo cp test/squid.conf /etc/squid/squid.conf
--- a/lib/config.js
+++ b/lib/config.js
@@ -1,6 +1,7 @@
 const JAMBONES_TTS_TRIM_SILENCE = process.env.JAMBONES_TTS_TRIM_SILENCE;
 const JAMBONES_DISABLE_TTS_STREAMING = process.env.JAMBONES_DISABLE_TTS_STREAMING;
 const JAMBONES_DISABLE_AZURE_TTS_STREAMING = process.env.JAMBONES_DISABLE_AZURE_TTS_STREAMING;
+const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;

 const JAMBONES_HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
 const JAMBONES_HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
@@ -18,6 +19,7 @@ module.exports = {
  JAMBONES_HTTP_PROXY_IP,
  JAMBONES_HTTP_PROXY_PORT,
  JAMBONES_TTS_CACHE_DURATION_MINS,
+  JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
  TMP_FOLDER,
  HTTP_TIMEOUT
 };
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -44,6 +44,7 @@ const {
  JAMBONES_HTTP_PROXY_IP,
  JAMBONES_HTTP_PROXY_PORT,
  JAMBONES_TTS_CACHE_DURATION_MINS,
+  JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
 } = require('./config');
 const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS;
 const OpenAI = require('openai');
@@ -86,7 +87,7 @@ const trimTrailingSilence = (buffer) => {
 */
 async function synthAudio(client, createHash, retrieveHash, logger, stats, { account_sid,
  vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId,
-  disableTtsCache, renderForCaching, disableTtsStreaming, options
+  disableTtsCache, renderForCaching = false, disableTtsStreaming, options
 }) {
  let audioBuffer;
  let servedFromCache = false;
@@ -157,20 +158,42 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
    }

  }
+
  const key = makeSynthKey({
    account_sid,
    vendor,
    language: language || '',
    voice: voice || deploymentId,
    engine,
-    text
+    text,
+    renderForCaching
  });
  let filePath;
-  filePath = makeFilePath(vendor, key, salt);
+  filePath = makeFilePath({vendor, key, salt, renderForCaching});
  debug(`synth key is ${key}`);
  let cached;
  if (!disableTtsCache) {
    cached = await client.get(key);
+    /**
+    *  If we are using tts streaming and also precaching audio, audio could have been cached by streaming (r8)
+    * or here in speech-utils due to precaching (mp3), so we need to check for both keys.
+    */
+    if (!cached && JAMBONES_EAGERLY_PRE_CACHE_AUDIO) {
+      const preCachekey = makeSynthKey({
+        account_sid,
+        vendor,
+        language: language || '',
+        voice: voice || deploymentId,
+        engine,
+        text,
+        renderForCaching: true
+      });
+      cached = await client.get(preCachekey);
+      if (cached) {
+        // Precache audio is available update filpath with precache file extension.
+        filePath = makeFilePath({vendor, key, salt, renderForCaching: true});
+      }
+    }
  }
  if (cached) {
    // found in cache - extend the expiry and use it
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -16,29 +16,31 @@ const debug = require('debug')('jambonz:realtimedb-helpers');
 */
 //const nuanceClientMap = new Map();

-function makeSynthKey({account_sid = '', vendor, language, voice, engine = '', text}) {
+function makeSynthKey({
+  account_sid = '', vendor, language, voice, engine = '', text,
+  renderForCaching = false}) {
  const hash = crypto.createHash('sha1');
  hash.update(`${language}:${vendor}:${voice}:${engine}:${text}`);
  const hexHashKey = hash.digest('hex');
  const accountKey = account_sid ? `:${account_sid}` : '';
-  const namespace = vendor.startsWith('custom') ? vendor : getFileExtension(vendor);
+  const namespace = vendor.startsWith('custom') ? vendor : getFileExtension({vendor, renderForCaching});
  const key = `tts${accountKey}:${namespace}:${hexHashKey}`;
  return key;
 }

-function makeFilePath(vendor, key, salt = '') {
-  const extension = getFileExtension(vendor);
+function makeFilePath({vendor, key, salt = '', renderForCaching = false}) {
+  const extension = getFileExtension({vendor, renderForCaching});
  return `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt}`)}.${extension}`;
 }

-function getFileExtension(vendor) {
+function getFileExtension({vendor, renderForCaching = false}) {
  const mp3Extension = 'mp3';
  const r8Extension = 'r8';

  switch (vendor) {
    case 'azure':
    case 'microsoft':
-      if (!JAMBONES_DISABLE_TTS_STREAMING || JAMBONES_TTS_TRIM_SILENCE) {
+      if (!renderForCaching && !JAMBONES_DISABLE_TTS_STREAMING || JAMBONES_TTS_TRIM_SILENCE) {
        return r8Extension;
      } else {
        return mp3Extension;
@@ -46,7 +48,7 @@ function getFileExtension(vendor) {
    case 'deepgram':
    case 'elevenlabs':
    case 'rimlabs':
-      if (!JAMBONES_DISABLE_TTS_STREAMING) {
+      if (!renderForCaching && !JAMBONES_DISABLE_TTS_STREAMING) {
        return r8Extension;
      } else {
        return mp3Extension;