support houndify stt (#498)

* support houndify stt

* wip

* test houdify stt credential

* wip

* wip

* update verb specification
This commit is contained in:
Hoan Luu Huu
2025-10-14 11:52:49 +07:00
committed by GitHub
parent 8267ddaffd
commit bcff9b35a6
6 changed files with 474 additions and 42 deletions
+21 -1
View File
@@ -16,7 +16,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
testVoxistStt,
testOpenAiStt,
testInworld,
testResembleTTS} = require('../../utils/speech-utils');
testResembleTTS,
testHoundifyStt} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -124,6 +125,7 @@ const encryptCredential = (obj) => {
role_arn,
region,
client_id,
client_key,
client_secret,
secret,
nuance_tts_uri,
@@ -318,6 +320,13 @@ const encryptCredential = (obj) => {
const assemblyaiData = JSON.stringify({api_key, service_version});
return encrypt(assemblyaiData);
case 'houndify':
assert(client_id, 'invalid houndify speech credential: client_id is required');
assert(client_key, 'invalid houndify speech credential: client_key is required');
assert(user_id, 'invalid houndify speech credential: user_id is required');
const houndifyData = JSON.stringify({client_id, client_key, user_id});
return encrypt(houndifyData);
case 'voxist':
assert(api_key, 'invalid voxist speech credential: api_key is required');
const voxistData = JSON.stringify({api_key});
@@ -970,6 +979,17 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.sttTestResult(sid, false);
}
}
} else if (cred.vendor === 'houndify') {
if (cred.use_for_stt) {
try {
await testHoundifyStt(logger, credential);
results.stt.status = 'ok';
SpeechCredential.sttTestResult(sid, true);
} catch (err) {
results.stt = {status: 'fail', reason: err.message};
SpeechCredential.sttTestResult(sid, false);
}
}
} else if (cred.vendor === 'voxist') {
const {api_key} = credential;
if (cred.use_for_stt) {
+19
View File
@@ -0,0 +1,19 @@
module.exports = [
{ name: 'English', value: 'en' },
{ name: 'Spanish', value: 'es' },
{ name: 'Portuguese', value: 'pt' },
{ name: 'French', value: 'fr' },
{ name: 'Indian-accented English', value: 'en-IN' },
{ name: 'German', value: 'de' },
{ name: 'Dutch', value: 'nl' },
{ name: 'Italian', value: 'it' },
{ name: 'Korean', value: 'ko' },
{ name: 'Japanese', value: 'ja' },
{ name: 'Mandarin', value: 'zh-CN' },
{ name: 'Russian', value: 'ru' },
{ name: 'Polish', value: 'pl' },
{ name: 'Swedish', value: 'sv' },
{ name: 'Arabic', value: 'ar' },
{ name: 'Turkish', value: 'tr' },
{ name: 'Hebrew', value: 'he' },
];
+82 -2
View File
@@ -5,6 +5,7 @@ const sdk = require('microsoft-cognitiveservices-speech-sdk');
const { SpeechClient } = require('@soniox/soniox-node');
const fs = require('fs');
const { AssemblyAI } = require('assemblyai');
const Houndify = require('houndify');
const {decrypt, obscureKey} = require('./encrypt-decrypt');
const { RealtimeSession } = require('speechmatics');
@@ -45,6 +46,7 @@ const SttCobaltLanguagesVoices = require('./speech-data/stt-cobalt');
const SttSonioxLanguagesVoices = require('./speech-data/stt-soniox');
const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics');
const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
const SttHoundifyLanguagesVoices = require('./speech-data/stt-houndify');
const SttVoxistLanguagesVoices = require('./speech-data/stt-voxist');
const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
@@ -595,6 +597,72 @@ const testAssemblyStt = async(logger, credentials) => {
});
};
const testHoundifyStt = async(logger, credentials) => {
const {client_id, client_key, user_id} = credentials;
return new Promise((resolve, reject) => {
try {
// Read the test audio file
const audioBuffer = fs.readFileSync(`${__dirname}/../../data/test_audio.wav`);
// Create VoiceRequest for speech-to-text testing
const voiceRequest = new Houndify.VoiceRequest({
// Your Houndify Client ID and Key
clientId: client_id,
clientKey: client_key,
// Request info
requestInfo: {
UserID: user_id || 'test_user',
Latitude: 37.388309,
Longitude: -121.973968
},
// Audio format configuration
sampleRate: 16000,
enableVAD: true,
// Response and error handlers
onResponse: function(response, info) {
logger.debug({response, info}, 'Houndify STT response received');
if (response && response.AllResults && response.AllResults.length > 0) {
resolve(response);
} else {
reject(new Error('No transcription results received'));
}
},
onError: function(err, info) {
logger.error({err, info}, 'Houndify STT error');
reject(err);
},
onRecordingStarted: function() {
logger.debug('Houndify recording started');
},
onRecordingStopped: function() {
logger.debug('Houndify recording stopped');
}
});
// Send audio in chunks (VoiceRequest automatically starts when you write data)
const chunkSize = 1024;
for (let i = 0; i < audioBuffer.length; i += chunkSize) {
const chunk = audioBuffer.slice(i, i + chunkSize);
voiceRequest.write(chunk);
}
// End the request
voiceRequest.end();
} catch (error) {
logger.error({error}, 'Failed to create Houndify VoiceRequest');
reject(error);
}
});
};
const testVoxistStt = async(logger, credentials) => {
const {api_key} = credentials;
const response = await fetch('https://api-asr.voxist.com/clients', {
@@ -749,7 +817,12 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.service_version = o.service_version;
} else if ('resemble' === obj.vendor) {
} else if ('houndify' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_key = isObscureKey ? obscureKey(o.client_key) : o.client_key;
obj.client_id = o.client_id;
obj.user_id = o.user_id;
} else if ('resemble' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.resemble_tts_uri = o.resemble_tts_uri;
@@ -828,6 +901,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
return await getLanguagesAndVoicesForResemble(credential, getTtsVoices, logger);
case 'assemblyai':
return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
case 'houndify':
return await getLanguagesVoicesForHoundify(credential, getTtsVoices, logger);
case 'voxist':
return await getLanguagesVoicesForVoxist(credential, getTtsVoices, logger);
case 'whisper':
@@ -1249,6 +1324,10 @@ async function getLanguagesVoicesForAssemblyAI(credential) {
return tranform(undefined, SttAssemblyaiLanguagesVoices);
}
async function getLanguagesVoicesForHoundify(credential) {
return tranform(undefined, SttHoundifyLanguagesVoices);
}
async function getLanguagesVoicesForVoxist(credential) {
return tranform(undefined, SttVoxistLanguagesVoices);
}
@@ -1646,5 +1725,6 @@ module.exports = {
testCartesia,
testVoxistStt,
testOpenAiStt,
testResembleTTS
testResembleTTS,
testHoundifyStt
};