support voxist stt (#384)

2025-12-19 05:47:46 +00:00 · 2025-02-05 20:32:36 +07:00
parent 86d50d94cb
commit 73e35c84c5
4 changed files with 83 additions and 2 deletions
--- a/lib/routes/api/speech-credentials.js
+++ b/lib/routes/api/speech-credentials.js
@@ -12,7 +12,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
  testVerbioTts,
  testVerbioStt,
  testSpeechmaticsStt,
-  testCartesia} = require('../../utils/speech-utils');
+  testCartesia,
+  testVoxistStt} = require('../../utils/speech-utils');
 const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
 const {
  testGoogleTts,
@@ -270,6 +271,11 @@ const encryptCredential = (obj) => {
      const assemblyaiData = JSON.stringify({api_key});
      return encrypt(assemblyaiData);

+    case 'voxist':
+      assert(api_key, 'invalid voxist speech credential: api_key is required');
+      const voxistData = JSON.stringify({api_key});
+      return encrypt(voxistData);
+
    case 'whisper':
      assert(api_key, 'invalid whisper speech credential: api_key is required');
      assert(model_id, 'invalid whisper speech credential: model_id is required');
@@ -853,6 +859,18 @@ router.get('/:sid/test', async(req, res) => {
          SpeechCredential.sttTestResult(sid, false);
        }
      }
+    } else if (cred.vendor === 'voxist') {
+      const {api_key} = credential;
+      if (cred.use_for_stt) {
+        try {
+          await testVoxistStt(logger, {api_key});
+          results.stt.status = 'ok';
+          SpeechCredential.sttTestResult(sid, true);
+        } catch (err) {
+          results.stt = {status: 'fail', reason: err.message};
+          SpeechCredential.sttTestResult(sid, false);
+        }
+      }
    } else if (cred.vendor === 'whisper') {
      if (cred.use_for_tts) {
        try {
--- a/lib/utils/speech-data/stt-voxist.js
+++ b/lib/utils/speech-data/stt-voxist.js
@@ -0,0 +1,8 @@
+module.exports = [
+  { name: 'English', value: 'en' },
+  { name: 'French', value: 'fr' },
+  { name: 'German', value: 'de' },
+  { name: 'Dutch', value: 'nl' },
+  { name: 'Italian', value: 'it' },
+  { name: 'Spanish', value: 'sp' },
+];
--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -41,6 +41,7 @@ const SttCobaltLanguagesVoices = require('./speech-data/stt-cobalt');
 const SttSonioxLanguagesVoices = require('./speech-data/stt-soniox');
 const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics');
 const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
+const SttVoxistLanguagesVoices = require('./speech-data/stt-voxist');
 const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
 const ttsCartesia = require('./speech-data/tts-cartesia');
 const ttsModelCartesia = require('./speech-data/tts-model-cartesia');
@@ -502,6 +503,20 @@ const testAssemblyStt = async(logger, credentials) => {
  });
 };

+const testVoxistStt = async(logger, credentials) => {
+  const {api_key} = credentials;
+  try {
+    const get = bent('https://api-asr.voxist.com', 'GET', 'json', {
+      'Accept': 'application/json',
+      'x-lvl-key': api_key
+    });
+    await get('/clients');
+  } catch (err) {
+    logger.info({err}, 'failed to get clients from Voxist');
+    throw err;
+  }
+};
+
 const getSpeechCredential = (credential, logger) => {
  const {vendor} = credential;
  logger.info(
@@ -629,6 +644,9 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
  } else if ('assemblyai' === obj.vendor) {
    const o = JSON.parse(decrypt(credential));
    obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
+  } else if ('voxist' === obj.vendor) {
+    const o = JSON.parse(decrypt(credential));
+    obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
  } else if ('whisper' === obj.vendor) {
    const o = JSON.parse(decrypt(credential));
    obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
@@ -692,6 +710,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
      return await getLanguagesVoicesForRimelabs(credential, getTtsVoices, logger);
    case 'assemblyai':
      return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
+    case 'voxist':
+      return await getLanguagesVoicesForVoxist(credential, getTtsVoices, logger);
    case 'whisper':
      return await getLanguagesVoicesForWhisper(credential, getTtsVoices, logger);
    case 'verbio':
@@ -988,6 +1008,10 @@ async function getLanguagesVoicesForAssemblyAI(credential) {
  return tranform(undefined, SttAssemblyaiLanguagesVoices);
 }

+async function getLanguagesVoicesForVoxist(credential) {
+  return tranform(undefined, SttVoxistLanguagesVoices);
+}
+
 async function getLanguagesVoicesForWhisper(credential) {
  return tranform(TtsWhisperLanguagesVoices, undefined, TtsModelWhisper);
 }
@@ -1278,5 +1302,6 @@ module.exports = {
  testVerbioStt,
  getLanguagesAndVoicesForVendor,
  testSpeechmaticsStt,
-  testCartesia
+  testCartesia,
+  testVoxistStt
 };
--- a/test/speech-credentials.js
+++ b/test/speech-credentials.js
@@ -780,6 +780,27 @@ test('speech credentials tests', async(t) => {
    });
    t.ok(result.statusCode === 204, 'successfully deleted speech credential');

+    /* add a credential for Voxist */
+    result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
+      resolveWithFullResponse: true,
+      auth: authUser,
+      json: true,
+      body: {
+        vendor: 'voxist',
+        use_for_stt: true,
+        api_key: "APIKEY"
+      }
+    });
+    t.ok(result.statusCode === 201, 'successfully added speech credential for Voxist');
+    const voxistSid = result.body.sid;
+
+    /* delete the credential */
+    result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${voxistSid}`, {
+      auth: authUser,
+      resolveWithFullResponse: true,
+    });
+    t.ok(result.statusCode === 204, 'successfully deleted speech credential');
+
    /* add a credential for aws polly by roleArn */
    result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
      resolveWithFullResponse: true,
@@ -972,6 +993,15 @@ test('speech credentials tests', async(t) => {
    });
    t.ok(result.body.stt.length !== 0, 'successfully get assemblyai supported languages and voices');

+    /* Check voxist supportedLanguagesAndVoices */
+    result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/speech/supportedLanguagesAndVoices?vendor=voxist`, {
+      resolveWithFullResponse: true,
+      simple: false,
+      auth: authAdmin,
+      json: true,
+    });
+    t.ok(result.body.stt.length !== 0, 'successfully get voxist supported languages and voices');
+
    /* Check whisper supportedLanguagesAndVoices */
    result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/speech/supportedLanguagesAndVoices?vendor=whisper`, {
      resolveWithFullResponse: true,