mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2026-01-25 02:08:24 +00:00
support gladia stt (#503)
* support gladia stt * wip * update verb specification
This commit is contained in:
103
lib/utils/speech-data/stt-gladia.js
Normal file
103
lib/utils/speech-data/stt-gladia.js
Normal file
@@ -0,0 +1,103 @@
|
||||
module.exports = [
|
||||
{ name: 'Afrikaans', value: 'af' },
|
||||
{ name: 'Albanian', value: 'sq' },
|
||||
{ name: 'Amharic', value: 'am' },
|
||||
{ name: 'Arabic', value: 'ar' },
|
||||
{ name: 'Armenian', value: 'hy' },
|
||||
{ name: 'Assamese', value: 'as' },
|
||||
{ name: 'Azerbaijani', value: 'az' },
|
||||
{ name: 'Bashkir', value: 'ba' },
|
||||
{ name: 'Basque', value: 'eu' },
|
||||
{ name: 'Belarusian', value: 'be' },
|
||||
{ name: 'Bengali', value: 'bn' },
|
||||
{ name: 'Bosnian', value: 'bs' },
|
||||
{ name: 'Breton', value: 'br' },
|
||||
{ name: 'Bulgarian', value: 'bg' },
|
||||
{ name: 'Cantonese', value: 'yue' },
|
||||
{ name: 'Catalan', value: 'ca' },
|
||||
{ name: 'Chinese', value: 'zh' },
|
||||
{ name: 'Croatian', value: 'hr' },
|
||||
{ name: 'Czech', value: 'cs' },
|
||||
{ name: 'Danish', value: 'da' },
|
||||
{ name: 'Dutch', value: 'nl' },
|
||||
{ name: 'English', value: 'en' },
|
||||
{ name: 'Estonian', value: 'et' },
|
||||
{ name: 'Faroese', value: 'fo' },
|
||||
{ name: 'Finnish', value: 'fi' },
|
||||
{ name: 'French', value: 'fr' },
|
||||
{ name: 'Galician', value: 'gl' },
|
||||
{ name: 'Georgian', value: 'ka' },
|
||||
{ name: 'German', value: 'de' },
|
||||
{ name: 'Greek', value: 'el' },
|
||||
{ name: 'Gujarati', value: 'gu' },
|
||||
{ name: 'Haitian Creole', value: 'ht' },
|
||||
{ name: 'Hausa', value: 'ha' },
|
||||
{ name: 'Hawaiian', value: 'haw' },
|
||||
{ name: 'Hebrew', value: 'he' },
|
||||
{ name: 'Hindi', value: 'hi' },
|
||||
{ name: 'Hungarian', value: 'hu' },
|
||||
{ name: 'Icelandic', value: 'is' },
|
||||
{ name: 'Indonesian', value: 'id' },
|
||||
{ name: 'Italian', value: 'it' },
|
||||
{ name: 'Japanese', value: 'ja' },
|
||||
{ name: 'Javanese', value: 'jw' },
|
||||
{ name: 'Kannada', value: 'kn' },
|
||||
{ name: 'Kazakh', value: 'kk' },
|
||||
{ name: 'Khmer', value: 'km' },
|
||||
{ name: 'Korean', value: 'ko' },
|
||||
{ name: 'Lao', value: 'lo' },
|
||||
{ name: 'Latin', value: 'la' },
|
||||
{ name: 'Latvian', value: 'lv' },
|
||||
{ name: 'Lingala', value: 'ln' },
|
||||
{ name: 'Lithuanian', value: 'lt' },
|
||||
{ name: 'Luxembourgish', value: 'lb' },
|
||||
{ name: 'Macedonian', value: 'mk' },
|
||||
{ name: 'Malagasy', value: 'mg' },
|
||||
{ name: 'Malay', value: 'ms' },
|
||||
{ name: 'Malayalam', value: 'ml' },
|
||||
{ name: 'Maltese', value: 'mt' },
|
||||
{ name: 'Maori', value: 'mi' },
|
||||
{ name: 'Marathi', value: 'mr' },
|
||||
{ name: 'Mongolian', value: 'mn' },
|
||||
{ name: 'Myanmar', value: 'my' },
|
||||
{ name: 'Nepali', value: 'ne' },
|
||||
{ name: 'Norwegian', value: 'no' },
|
||||
{ name: 'Nynorsk', value: 'nn' },
|
||||
{ name: 'Occitan', value: 'oc' },
|
||||
{ name: 'Pashto', value: 'ps' },
|
||||
{ name: 'Persian', value: 'fa' },
|
||||
{ name: 'Polish', value: 'pl' },
|
||||
{ name: 'Portuguese', value: 'pt' },
|
||||
{ name: 'Punjabi', value: 'pa' },
|
||||
{ name: 'Romanian', value: 'ro' },
|
||||
{ name: 'Russian', value: 'ru' },
|
||||
{ name: 'Sanskrit', value: 'sa' },
|
||||
{ name: 'Serbian', value: 'sr' },
|
||||
{ name: 'Shona', value: 'sn' },
|
||||
{ name: 'Sindhi', value: 'sd' },
|
||||
{ name: 'Sinhala', value: 'si' },
|
||||
{ name: 'Slovak', value: 'sk' },
|
||||
{ name: 'Slovenian', value: 'sl' },
|
||||
{ name: 'Somali', value: 'so' },
|
||||
{ name: 'Spanish', value: 'es' },
|
||||
{ name: 'Sundanese', value: 'su' },
|
||||
{ name: 'Swahili', value: 'sw' },
|
||||
{ name: 'Swedish', value: 'sv' },
|
||||
{ name: 'Tagalog', value: 'tl' },
|
||||
{ name: 'Tajik', value: 'tg' },
|
||||
{ name: 'Tamil', value: 'ta' },
|
||||
{ name: 'Tatar', value: 'tt' },
|
||||
{ name: 'Telugu', value: 'te' },
|
||||
{ name: 'Thai', value: 'th' },
|
||||
{ name: 'Tibetan', value: 'bo' },
|
||||
{ name: 'Turkish', value: 'tr' },
|
||||
{ name: 'Turkmen', value: 'tk' },
|
||||
{ name: 'Ukrainian', value: 'uk' },
|
||||
{ name: 'Urdu', value: 'ur' },
|
||||
{ name: 'Uzbek', value: 'uz' },
|
||||
{ name: 'Vietnamese', value: 'vi' },
|
||||
{ name: 'Welsh', value: 'cy' },
|
||||
{ name: 'Wolof', value: 'wo' },
|
||||
{ name: 'Yiddish', value: 'yi' },
|
||||
{ name: 'Yoruba', value: 'yo' }
|
||||
];
|
||||
@@ -6,6 +6,7 @@ const { SpeechClient } = require('@soniox/soniox-node');
|
||||
const fs = require('fs');
|
||||
const { AssemblyAI } = require('assemblyai');
|
||||
const Houndify = require('houndify');
|
||||
const { GladiaClient } = require('@gladiaio/sdk');
|
||||
const {decrypt, obscureKey} = require('./encrypt-decrypt');
|
||||
const { RealtimeSession } = require('speechmatics');
|
||||
|
||||
@@ -50,6 +51,7 @@ const SttHoundifyLanguagesVoices = require('./speech-data/stt-houndify');
|
||||
const SttVoxistLanguagesVoices = require('./speech-data/stt-voxist');
|
||||
const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
|
||||
const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
|
||||
const SttGladiaLanguagesVoices = require('./speech-data/stt-gladia');
|
||||
|
||||
|
||||
const SttModelOpenai = require('./speech-data/stt-model-openai');
|
||||
@@ -170,6 +172,65 @@ const testGoogleStt = async(logger, credentials) => {
|
||||
}
|
||||
};
|
||||
|
||||
const testGladiaStt = async(logger, credentials) => {
|
||||
const {api_key} = credentials;
|
||||
|
||||
try {
|
||||
const gladiaClient = new GladiaClient({
|
||||
apiKey: api_key,
|
||||
});
|
||||
const gladiaConfig = {
|
||||
model: 'solaria-1',
|
||||
encoding: 'wav/pcm',
|
||||
sample_rate: 16000,
|
||||
bit_depth: 16,
|
||||
channels: 1,
|
||||
language_config: {
|
||||
languages: ['en'],
|
||||
code_switching: false,
|
||||
},
|
||||
};
|
||||
// Start the live session
|
||||
const liveSession = gladiaClient.liveV2().startSession(gladiaConfig);
|
||||
// Read the test audio file
|
||||
const audioBuffer = fs.readFileSync(`${__dirname}/../../data/test_audio.wav`);
|
||||
|
||||
// Wait for final transcript
|
||||
return new Promise((resolve, reject) => {
|
||||
liveSession.on('message', (message) => {
|
||||
if (message.type === 'transcript' && message.data.is_final) {
|
||||
logger.debug(`${message.data.id}: ${message.data.utterance.text}`);
|
||||
liveSession.stopRecording();
|
||||
resolve(message.data.utterance.text);
|
||||
}
|
||||
});
|
||||
|
||||
liveSession.on('error', (error) => {
|
||||
logger.error({error}, 'Gladia Live STT error');
|
||||
reject(error);
|
||||
});
|
||||
|
||||
// Send audio in chunks
|
||||
const chunkSize = 1024;
|
||||
for (let i = 0; i < audioBuffer.length; i += chunkSize) {
|
||||
const chunk = audioBuffer.slice(i, i + chunkSize);
|
||||
liveSession.sendAudio(chunk);
|
||||
}
|
||||
// Stop recording after sending all audio
|
||||
liveSession.stopRecording();
|
||||
|
||||
// Set a timeout to prevent hanging
|
||||
setTimeout(() => {
|
||||
reject(new Error('Gladia STT test timeout'));
|
||||
}, 30000); // 30 second timeout
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error({error}, 'Failed to create Gladia Live STT session');
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
const testDeepgramStt = async(logger, credentials) => {
|
||||
const {api_key, deepgram_stt_uri, deepgram_stt_use_tls} = credentials;
|
||||
const deepgram = new Deepgram(api_key, deepgram_stt_uri, deepgram_stt_uri && deepgram_stt_use_tls);
|
||||
@@ -758,6 +819,10 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
}
|
||||
else if ('gladia' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
}
|
||||
else if ('ibm' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.tts_api_key = isObscureKey ? obscureKey(o.tts_api_key) : o.tts_api_key;
|
||||
@@ -881,6 +946,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
|
||||
return await getLanguagesVoicesForNuane(credential, getTtsVoices, logger);
|
||||
case 'deepgram':
|
||||
return await getLanguagesVoicesForDeepgram(credential, getTtsVoices, logger);
|
||||
case 'gladia':
|
||||
return await getLanguagesVoicesForGladia(credential, getTtsVoices, logger);
|
||||
case 'ibm':
|
||||
return await getLanguagesVoicesForIbm(credential, getTtsVoices, logger);
|
||||
case 'nvidia':
|
||||
@@ -1052,6 +1119,11 @@ async function getLanguagesVoicesForDeepgram(credential, getTtsVoices, logger) {
|
||||
TtsModelDeepgram, sttModelDeepgram.sort((a, b) => a.name.localeCompare(b.name)));
|
||||
}
|
||||
|
||||
async function getLanguagesVoicesForGladia(credential, getTtsVoices, logger) {
|
||||
return tranform(undefined, SttGladiaLanguagesVoices.sort((a, b) => a.name.localeCompare(b.name)),
|
||||
undefined, undefined);
|
||||
}
|
||||
|
||||
async function getLanguagesVoicesForIbm(credential, getTtsVoices, logger) {
|
||||
if (credential) {
|
||||
try {
|
||||
@@ -1706,6 +1778,7 @@ module.exports = {
|
||||
testNuanceTts,
|
||||
testNuanceStt,
|
||||
testDeepgramStt,
|
||||
testGladiaStt,
|
||||
testIbmTts,
|
||||
testIbmStt,
|
||||
testSonioxStt,
|
||||
|
||||
Reference in New Issue
Block a user