support mod_rimelabs_tts

2025-12-19 03:37:49 +00:00 · 2024-04-12 15:57:02 +07:00
parent 51db63f992
commit 410b99ef24
2 changed files with 96 additions and 6 deletions
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -84,10 +84,10 @@ async function synthAudio(client, logger, stats, { account_sid,
  let rtt;
  logger = logger || noopLogger;

-  assert.ok(['google', 'aws', 'polly', 'microsoft',
-    'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper', 'deepgram', 'playht'].includes(vendor) ||
+  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs',
+    'whisper', 'deepgram', 'playht', 'rimelabs'].includes(vendor) ||
  vendor.startsWith('custom'),
-  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
+  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
  if ('google' === vendor) {
    assert.ok(language, 'synthAudio requires language when google is used');
  }
@@ -123,11 +123,15 @@ async function synthAudio(client, logger, stats, { account_sid,
    assert.ok(voice, 'synthAudio requires voice when elevenlabs is used');
    assert.ok(credentials.api_key, 'synthAudio requires api_key when elevenlabs is used');
    assert.ok(credentials.model_id, 'synthAudio requires model_id when elevenlabs is used');
-  }  else if ('playht' === vendor) {
+  } else if ('playht' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when playht is used');
    assert.ok(credentials.api_key, 'synthAudio requires api_key when playht is used');
    assert.ok(credentials.user_id, 'synthAudio requires user_id when playht is used');
    assert.ok(credentials.voice_engine, 'synthAudio requires voice_engine when playht is used');
+  } else if ('rimelabs' === vendor) {
+    assert.ok(voice, 'synthAudio requires voice when rimelabs is used');
+    assert.ok(credentials.api_key, 'synthAudio requires api_key when rimelabs is used');
+    assert.ok(credentials.model_id, 'synthAudio requires model_id when rimelabs is used');
  } else if ('whisper' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when whisper is used');
    assert.ok(credentials.model_id, 'synthAudio requires model when whisper is used');
@@ -217,6 +221,12 @@ async function synthAudio(client, logger, stats, { account_sid,
        });
        if (audioBuffer?.filePath) return audioBuffer;
        break;
+      case 'rimelabs':
+        audioBuffer = await synthRimelabs(logger, {
+          credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
+        });
+        if (audioBuffer?.filePath) return audioBuffer;
+        break;
      case 'whisper':
        audioBuffer = await synthWhisper(logger, {
          credentials, stats, voice, text, renderForCaching, disableTtsStreaming});
@@ -748,6 +758,52 @@ const synthPlayHT = async(logger, {
  }
 };

+const synthRimelabs = async(logger, {
+  credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
+}) => {
+  const {api_key, model_id, options: credOpts} = credentials;
+  const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
+
+  /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
+  if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
+    let params = '';
+    params += `{api_key=${api_key}`;
+    params += `,model_id=${model_id}`;
+    params += ',vendor=rimelabs';
+    params += `,voice=${voice}`;
+    params += ',write_cache_file=1';
+    if (opts.speedAlpha) params += `,speed_alpha=${opts.speedAlpha}`;
+    if (opts.reduceLatency) params += `,reduce_latency=${opts.reduceLatency}`;
+    params += '}';
+
+    return {
+      filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`,
+      servedFromCache: false,
+      rtt: 0
+    };
+  }
+
+  try {
+    const post = bent('https://users.rime.ai', 'POST', 'buffer', {
+      'Authorization': `Bearer ${api_key}`,
+      'Accept': 'audio/mp3',
+      'Content-Type': 'application/json'
+    });
+    const mp3 = await post('/v1/rime-tts', {
+      speaker: voice,
+      text,
+      modelId: model_id,
+      samplingRate: 8000,
+      ...opts
+    });
+    return mp3;
+  } catch (err) {
+    logger.info({err}, 'synth rimelabs returned error');
+    stats.increment('tts.count', ['vendor:rimelabs', 'accepted:no']);
+    throw err;
+  }
+};
+
 const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
  const {api_key, model_id, baseURL, timeout, speed} = credentials;
  /* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
--- a/test/synth.js
+++ b/test/synth.js
@@ -581,14 +581,48 @@ test('PlayHT speech synth tests', async(t) => {
      text,
      renderForCaching: true
    });
-    t.ok(!opts.servedFromCache, `successfully synthesized eleven audio to ${opts.filePath}`);
+    t.ok(!opts.servedFromCache, `successfully playht eleven audio to ${opts.filePath}`);

  } catch (err) {
    console.error(JSON.stringify(err));
    t.end(err);
  }
  client.quit();
-})
+});
+
+test('rimelabs speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.RIMELABS_API_KEY) {
+    t.pass('skipping rimelabs speech synth tests since RIMELABS_API_KEY is not provided');
+    return t.end();
+  }
+  const text = 'Hi there and welcome to jambones!';
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'rimelabs',
+      credentials: {
+        api_key: process.env.RIMELABS_API_KEY,
+        model_id: 'mist',
+        options: JSON.stringify({
+          speedAlpha: 1.0,
+          reduceLatency: false
+        })
+      },
+      language: 'en-US',
+      voice: 'amber',
+      text,
+      renderForCaching: true
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized rimelabs audio to ${opts.filePath}`);
+
+  } catch (err) {
+    console.error(JSON.stringify(err));
+    t.end(err);
+  }
+  client.quit();
+});

 test('whisper speech synth tests', async(t) => {
  const fn = require('..');