Merge pull request #47 from jambonz/feat/update_elevenlabs

support elevenlabs options
2026-01-25 02:08:26 +00:00 · 2023-11-30 08:53:19 -05:00
parent 9827f7405d a517f37473
commit 4ead5ee417
2 changed files with 20 additions and 7 deletions
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -76,7 +76,7 @@ const trimTrailingSilence = (buffer) => {
 * the synthesized audio, and a variable indicating whether it was served from cache
 */
 async function synthAudio(client, logger, stats, { account_sid,
-  vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId, disableTtsCache
+  vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId, disableTtsCache, options
 }) {
  let audioBuffer;
  let servedFromCache = false;
@@ -194,7 +194,7 @@ async function synthAudio(client, logger, stats, { account_sid,
        audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
        break;
      case 'elevenlabs':
-        audioBuffer = await synthElevenlabs(logger, {credentials, stats, language, voice, text, filePath});
+        audioBuffer = await synthElevenlabs(logger, {credentials, options, stats, language, voice, text, filePath});
        break;
      case 'whisper':
        audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text});
@@ -585,21 +585,25 @@ const synthCustomVendor = async(logger, {credentials, stats, language, voice, te
  }
 };

-const synthElevenlabs = async(logger, {credentials, stats, language, voice, text}) => {
-  const {api_key, model_id} = credentials;
+const synthElevenlabs = async(logger, {credentials, options, stats, language, voice, text}) => {
+  const {api_key, model_id, options: credOpts} = credentials;
+  const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
+  const optimize_streaming_latency = opts.optimize_streaming_latency ?
+    `?optimize_streaming_latency=${opts.optimize_streaming_latency}` : '';
  try {
    const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', {
      'xi-api-key': api_key,
      'Accept': 'audio/mpeg',
      'Content-Type': 'application/json'
    });
-    const mp3 = await post(`/v1/text-to-speech/${voice}`, {
+    const mp3 = await post(`/v1/text-to-speech/${voice}${optimize_streaming_latency}`, {
      text,
      model_id,
      voice_settings: {
        stability: 0.5,
        similarity_boost: 0.5
-      }
+      },
+      ...opts
    });
    return mp3;
  } catch (err) {
--- a/test/synth.js
+++ b/test/synth.js
@@ -459,7 +459,16 @@ test('Elevenlabs speech synth tests', async(t) => {
      vendor: 'elevenlabs',
      credentials: {
        api_key: process.env.ELEVENLABS_API_KEY,
-        model_id: process.env.ELEVENLABS_MODEL_ID
+        model_id: process.env.ELEVENLABS_MODEL_ID,
+        options: JSON.stringify({
+          optimize_streaming_latency: 1,
+          voice_settings: {
+            similarity_boost: 1,
+            stability: 0.8,
+            style: 1,
+            use_speaker_boost: true
+          }
+        })
      },
      language: 'en-US',
      voice: process.env.ELEVENLABS_VOICE_ID,