add param to synthAuydio to indicate whether audio is being generated specifically for caching purposes

2026-07-04 19:31:49 +00:00 · 2024-01-22 08:10:00 -05:00
parent 7e21e0b666
commit cb479f04d5
3 changed files with 1637 additions and 2136 deletions
@@ -76,7 +76,8 @@ const trimTrailingSilence = (buffer) => {
 * the synthesized audio, and a variable indicating whether it was served from cache
 */
 async function synthAudio(client, logger, stats, { account_sid,
-  vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId, disableTtsCache, options
+  vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId,
+  disableTtsCache, renderForCaching, options
 }) {
  let audioBuffer;
  let servedFromCache = false;
@@ -194,7 +195,9 @@ async function synthAudio(client, logger, stats, { account_sid,
        audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
        break;
      case 'elevenlabs':
-        audioBuffer = await synthElevenlabs(logger, {credentials, options, stats, language, voice, text, filePath});
+        audioBuffer = await synthElevenlabs(logger, {
+          credentials, options, stats, language, voice, text, renderForCaching, filePath
+        });
        if (typeof audioBuffer === 'object' && audioBuffer.filePath) {
          return audioBuffer;
        }
@@ -600,11 +603,12 @@ const synthCustomVendor = async(logger, {credentials, stats, language, voice, te
  }
 };

-const synthElevenlabs = async(logger, {credentials, options, stats, language, voice, text}) => {
+const synthElevenlabs = async(logger, {credentials, options, stats, language, voice, text, renderForCaching}) => {
  const {api_key, model_id, options: credOpts} = credentials;
  const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');

-  if (process.env.JAMBONES_ELEVENLABS_STREAMING) {
+  /* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
+  if (process.env.JAMBONES_ELEVENLABS_STREAMING && !renderForCaching) {
    let params = '';
    params += `{api_key=${api_key}`;
    params += `,model_id=${model_id}`;
@@ -24,26 +24,26 @@
  },
  "homepage": "https://github.com/jambonz/speech-utils#readme",
  "dependencies": {
-    "@aws-sdk/client-polly": "^3.359.0",
-    "@aws-sdk/client-sts": "^3.458.0",
-    "@google-cloud/text-to-speech": "^4.2.1",
-    "@grpc/grpc-js": "^1.8.13",
+    "@aws-sdk/client-polly": "^3.496.0",
+    "@aws-sdk/client-sts": "^3.496.0",
+    "@google-cloud/text-to-speech": "^5.0.2",
+    "@grpc/grpc-js": "^1.9.14",
    "@jambonz/realtimedb-helpers": "^0.8.7",
    "bent": "^7.3.12",
    "debug": "^4.3.4",
-    "form-urlencoded": "^6.1.0",
+    "form-urlencoded": "^6.1.4",
    "google-protobuf": "^3.21.2",
    "ibm-watson": "^8.0.0",
-    "microsoft-cognitiveservices-speech-sdk": "1.32.0",
-    "openai": "^4.16.2",
-    "undici": "^5.21.0"
+    "microsoft-cognitiveservices-speech-sdk": "1.34.0",
+    "openai": "^4.25.0",
+    "undici": "^6.4.0"
  },
  "devDependencies": {
-    "config": "^3.3.9",
-    "eslint": "^8.33.0",
+    "config": "^3.3.10",
+    "eslint": "^8.56.0",
    "eslint-plugin-promise": "^6.1.1",
    "nyc": "^15.1.0",
-    "pino": "^7.2.0",
-    "tape": "^5.1.1"
+    "pino": "^8.17.0",
+    "tape": "^5.7.3"
  }
 }