support houndify stt (#498)

* support houndify stt * wip * test houdify stt credential * wip * wip * update verb specification
2026-07-04 19:21:53 +00:00 · 2025-10-14 11:52:49 +07:00
parent 8267ddaffd
commit bcff9b35a6
6 changed files with 474 additions and 42 deletions
@@ -16,7 +16,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
  testVoxistStt,
  testOpenAiStt,
  testInworld,
-  testResembleTTS} = require('../../utils/speech-utils');
+  testResembleTTS,
+  testHoundifyStt} = require('../../utils/speech-utils');
 const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
 const {
  testGoogleTts,
@@ -124,6 +125,7 @@ const encryptCredential = (obj) => {
    role_arn,
    region,
    client_id,
+    client_key,
    client_secret,
    secret,
    nuance_tts_uri,
@@ -318,6 +320,13 @@ const encryptCredential = (obj) => {
      const assemblyaiData = JSON.stringify({api_key, service_version});
      return encrypt(assemblyaiData);

+    case 'houndify':
+      assert(client_id, 'invalid houndify speech credential: client_id is required');
+      assert(client_key, 'invalid houndify speech credential: client_key is required');
+      assert(user_id, 'invalid houndify speech credential: user_id is required');
+      const houndifyData = JSON.stringify({client_id, client_key, user_id});
+      return encrypt(houndifyData);
+
    case 'voxist':
      assert(api_key, 'invalid voxist speech credential: api_key is required');
      const voxistData = JSON.stringify({api_key});
@@ -970,6 +979,17 @@ router.get('/:sid/test', async(req, res) => {
          SpeechCredential.sttTestResult(sid, false);
        }
      }
+    } else if (cred.vendor === 'houndify') {
+      if (cred.use_for_stt) {
+        try {
+          await testHoundifyStt(logger, credential);
+          results.stt.status = 'ok';
+          SpeechCredential.sttTestResult(sid, true);
+        } catch (err) {
+          results.stt = {status: 'fail', reason: err.message};
+          SpeechCredential.sttTestResult(sid, false);
+        }
+      }
    } else if (cred.vendor === 'voxist') {
      const {api_key} = credential;
      if (cred.use_for_stt) {
@@ -0,0 +1,19 @@
+module.exports = [
+  { name: 'English', value: 'en' },
+  { name: 'Spanish', value: 'es' },
+  { name: 'Portuguese', value: 'pt' },
+  { name: 'French', value: 'fr' },
+  { name: 'Indian-accented English', value: 'en-IN' },
+  { name: 'German', value: 'de' },
+  { name: 'Dutch', value: 'nl' },
+  { name: 'Italian', value: 'it' },
+  { name: 'Korean', value: 'ko' },
+  { name: 'Japanese', value: 'ja' },
+  { name: 'Mandarin', value: 'zh-CN' },
+  { name: 'Russian', value: 'ru' },
+  { name: 'Polish', value: 'pl' },
+  { name: 'Swedish', value: 'sv' },
+  { name: 'Arabic', value: 'ar' },
+  { name: 'Turkish', value: 'tr' },
+  { name: 'Hebrew', value: 'he' },
+];
@@ -5,6 +5,7 @@ const sdk = require('microsoft-cognitiveservices-speech-sdk');
 const { SpeechClient } = require('@soniox/soniox-node');
 const fs = require('fs');
 const { AssemblyAI } = require('assemblyai');
+const Houndify = require('houndify');
 const {decrypt, obscureKey} = require('./encrypt-decrypt');
 const { RealtimeSession } = require('speechmatics');

@@ -45,6 +46,7 @@ const SttCobaltLanguagesVoices = require('./speech-data/stt-cobalt');
 const SttSonioxLanguagesVoices = require('./speech-data/stt-soniox');
 const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics');
 const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
+const SttHoundifyLanguagesVoices = require('./speech-data/stt-houndify');
 const SttVoxistLanguagesVoices = require('./speech-data/stt-voxist');
 const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
 const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
@@ -595,6 +597,72 @@ const testAssemblyStt = async(logger, credentials) => {
  });
 };

+const testHoundifyStt = async(logger, credentials) => {
+  const {client_id, client_key, user_id} = credentials;
+
+  return new Promise((resolve, reject) => {
+    try {
+      // Read the test audio file
+      const audioBuffer = fs.readFileSync(`${__dirname}/../../data/test_audio.wav`);
+
+      // Create VoiceRequest for speech-to-text testing
+      const voiceRequest = new Houndify.VoiceRequest({
+        // Your Houndify Client ID and Key
+        clientId: client_id,
+        clientKey: client_key,
+
+        // Request info
+        requestInfo: {
+          UserID: user_id || 'test_user',
+          Latitude: 37.388309,
+          Longitude: -121.973968
+        },
+
+        // Audio format configuration
+        sampleRate: 16000,
+        enableVAD: true,
+
+        // Response and error handlers
+        onResponse: function(response, info) {
+          logger.debug({response, info}, 'Houndify STT response received');
+          if (response && response.AllResults && response.AllResults.length > 0) {
+            resolve(response);
+          } else {
+            reject(new Error('No transcription results received'));
+          }
+        },
+
+        onError: function(err, info) {
+          logger.error({err, info}, 'Houndify STT error');
+          reject(err);
+        },
+
+        onRecordingStarted: function() {
+          logger.debug('Houndify recording started');
+        },
+
+        onRecordingStopped: function() {
+          logger.debug('Houndify recording stopped');
+        }
+      });
+
+      // Send audio in chunks (VoiceRequest automatically starts when you write data)
+      const chunkSize = 1024;
+      for (let i = 0; i < audioBuffer.length; i += chunkSize) {
+        const chunk = audioBuffer.slice(i, i + chunkSize);
+        voiceRequest.write(chunk);
+      }
+
+      // End the request
+      voiceRequest.end();
+
+    } catch (error) {
+      logger.error({error}, 'Failed to create Houndify VoiceRequest');
+      reject(error);
+    }
+  });
+};
+
 const testVoxistStt = async(logger, credentials) => {
  const {api_key} = credentials;
  const response = await fetch('https://api-asr.voxist.com/clients', {
@@ -749,7 +817,12 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
    const o = JSON.parse(decrypt(credential));
    obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
    obj.service_version = o.service_version;
-  }  else if ('resemble' === obj.vendor) {
+  } else if ('houndify' === obj.vendor) {
+    const o = JSON.parse(decrypt(credential));
+    obj.client_key = isObscureKey ? obscureKey(o.client_key) : o.client_key;
+    obj.client_id = o.client_id;
+    obj.user_id = o.user_id;
+  } else if ('resemble' === obj.vendor) {
    const o = JSON.parse(decrypt(credential));
    obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
    obj.resemble_tts_uri = o.resemble_tts_uri;
@@ -828,6 +901,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
      return await getLanguagesAndVoicesForResemble(credential, getTtsVoices, logger);
    case 'assemblyai':
      return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
+    case 'houndify':
+      return await getLanguagesVoicesForHoundify(credential, getTtsVoices, logger);
    case 'voxist':
      return await getLanguagesVoicesForVoxist(credential, getTtsVoices, logger);
    case 'whisper':
@@ -1249,6 +1324,10 @@ async function getLanguagesVoicesForAssemblyAI(credential) {
  return tranform(undefined, SttAssemblyaiLanguagesVoices);
 }

+async function getLanguagesVoicesForHoundify(credential) {
+  return tranform(undefined, SttHoundifyLanguagesVoices);
+}
+
 async function getLanguagesVoicesForVoxist(credential) {
  return tranform(undefined, SttVoxistLanguagesVoices);
 }
@@ -1646,5 +1725,6 @@ module.exports = {
  testCartesia,
  testVoxistStt,
  testOpenAiStt,
-  testResembleTTS
+  testResembleTTS,
+  testHoundifyStt
 };