add elevenlabs

2025-12-19 03:37:49 +00:00 · 2023-10-12 14:22:15 +07:00
parent b5daeff047
commit ea153e9833
2 changed files with 57 additions and 1 deletions
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -82,7 +82,8 @@ async function synthAudio(client, logger, stats, { account_sid,
  let rtt;
  logger = logger || noopLogger;

-  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor) ||
+  assert.ok(['google', 'aws', 'polly', 'microsoft',
+    'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs'].includes(vendor) ||
  vendor.startsWith('custom'),
  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
  if ('google' === vendor) {
@@ -183,6 +184,9 @@ async function synthAudio(client, logger, stats, { account_sid,
      case 'wellsaid':
        audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
        break;
+      case 'elevenlabs':
+        audioBuffer = await synthElevenlabs(logger, {credentials, stats, language, voice, text, filePath});
+        break;
      case vendor.startsWith('custom') ? vendor : 'cant_match_value':
        ({ audioBuffer, filePath } = await synthCustomVendor(logger,
          {credentials, stats, language, voice, text, filePath}));
@@ -568,6 +572,29 @@ const synthCustomVendor = async(logger, {credentials, stats, language, voice, te
  }
 };

+const synthElevenlabs = async(logger, {credentials, stats, language, voice, text}) => {
+  const {api_key, model_id} = credentials;
+  try {
+    const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', {
+      'xi-api-key': api_key,
+      'Accept': 'audio/mpeg',
+      'Content-Type': 'application/json'
+    });
+    const mp3 = await post(`/v1/text-to-speech/${voice}`, {
+      text,
+      model_id,
+      voice_settings: {
+        stability: 0.5,
+        similarity_boost: 0.5
+      }
+    });
+    return mp3;
+  } catch (err) {
+    logger.info({err}, 'synthEvenlabs returned error');
+    throw err;
+  }
+};
+
 const getFileExtFromMime = (mime) => {
  switch (mime) {
    case 'audio/wav':
--- a/test/synth.js
+++ b/test/synth.js
@@ -411,6 +411,35 @@ test('Custom Vendor speech synth tests', async(t) => {
  client.quit();
 });

+test('Elevenlabs speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.ELEVENLABS_API_KEY || !process.env.ELEVENLABS_VOICE_ID || !process.env.ELEVENLABS_MODEL_ID) {
+    t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
+    return t.end();
+  }
+  const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'elevenlabs',
+      credentials: {
+        api_key: process.env.ELEVENLABS_API_KEY,
+        model_id: process.env.ELEVENLABS_MODEL_ID
+      },
+      language: 'en-US',
+      voice: process.env.ELEVENLABS_VOICE_ID,
+      text,
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized eleven audio to ${opts.filePath}`);
+
+  } catch (err) {
+    console.error(JSON.stringify(err));
+    t.end(err);
+  }
+  client.quit();
+})
+
 test('TTS Cache tests', async(t) => {
  const fn = require('..');
  const {purgeTtsCache, getTtsSize, client} = fn(opts, logger);