support mod_rimelabs_tts (#310)

* support mod_rimelabs_tts * update speech utils 0.0.51
2026-01-25 02:08:24 +00:00 · 2024-04-12 18:25:04 +07:00
parent 80418aa7e5
commit 39fcb17dec
7 changed files with 815 additions and 7 deletions
--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -17,11 +17,13 @@ const TtsIbmLanguagesVoices = require('./speech-data/tts-ibm');
 const TtsNvidiaLanguagesVoices = require('./speech-data/tts-nvidia');
 const TtsElevenlabsLanguagesVoices = require('./speech-data/tts-elevenlabs');
 const TtsWhisperLanguagesVoices = require('./speech-data/tts-whisper');
+const TtsPlayHtLanguagesVoices = require('./speech-data/tts-playht');

 const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
 const TtsModelElevenLabs = require('./speech-data/tts-model-elevenlabs');
 const TtsModelWhisper = require('./speech-data/tts-model-whisper');
 const TtsModelPlayHT = require('./speech-data/tts-model-playht');
+const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');

 const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
 const SttAwsLanguagesVoices = require('./speech-data/stt-aws');
@@ -264,6 +266,27 @@ const testPlayHT = async(logger, synthAudio, credentials) => {
  }
 };

+const testRimelabs = async(logger, synthAudio, credentials) => {
+  try {
+    await synthAudio(
+      {
+        increment: () => {},
+        histogram: () => {}
+      },
+      {
+        vendor: 'rimelabs',
+        credentials,
+        language: 'en-US',
+        voice: 'amber',
+        text: 'Hi there and welcome to jambones!'
+      }
+    );
+  } catch (err) {
+    logger.info({err}, 'synth Playht returned error');
+    throw err;
+  }
+};
+
 const testWhisper = async(logger, synthAudio, credentials) => {
  try {
    await synthAudio({increment: () => {}, histogram: () => {}},
@@ -458,6 +481,11 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
    obj.user_id = o.user_id;
    obj.voice_engine = o.voice_engine;
    obj.options = o.options;
+  } else if ('rimelabs' === obj.vendor) {
+    const o = JSON.parse(decrypt(credential));
+    obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
+    obj.model_id = o.model_id;
+    obj.options = o.options;
  } else if (obj.vendor.startsWith('custom:')) {
    const o = JSON.parse(decrypt(credential));
    obj.auth_token = isObscureKey ? obscureKey(o.auth_token) : o.auth_token;
@@ -520,6 +548,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
      return await getLanguagesVoicesForElevenlabs(credential, getTtsVoices, logger);
    case 'playht':
      return await getLanguagesVoicesForPlayHT(credential, getTtsVoices, logger);
+    case 'rimelabs':
+      return await getLanguagesVoicesForRimelabs(credential, getTtsVoices, logger);
    case 'assemblyai':
      return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
    case 'whisper':
@@ -739,7 +769,28 @@ async function getLanguagesVoicesForPlayHT(credential) {
    }, []);
    return tranform(ttsVoices, undefined, TtsModelPlayHT);
  }
-  return tranform(undefined, undefined, TtsModelPlayHT);
+  return tranform(TtsPlayHtLanguagesVoices, undefined, TtsModelPlayHT);
+}
+
+async function getLanguagesVoicesForRimelabs(credential) {
+  const model_id = credential ? credential.model_id : null;
+  const get = bent('https://users.rime.ai', 'GET', 'json', {
+    'Accept': 'application/json'
+  });
+  const voices = await get('/data/voices/all.json');
+  let selectedVoices = model_id ? voices[model_id] : Object.values(voices).reduce((acc, val) => [...acc, ...val], []);
+  selectedVoices = selectedVoices.map((v) => ({
+    name: v.charAt(0).toUpperCase() + v.slice(1),
+    value: v
+  }));
+  const ttsVoices = [
+    {
+      value: 'en-US',
+      name: 'English (US)',
+      voices: selectedVoices
+    }
+  ];
+  return tranform(ttsVoices, undefined, TtsModelRimelabs);
 }

 async function getLanguagesVoicesForAssemblyAI(credential) {
@@ -894,6 +945,7 @@ module.exports = {
  testSonioxStt,
  testElevenlabs,
  testPlayHT,
+  testRimelabs,
  testAssemblyStt,
  testDeepgramTTS,
  getSpeechCredential,