mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2026-01-25 02:08:24 +00:00
support stt speechmatics (#353)
* support stt speechmatics * support speechmatics region authentication * update testcase for speechmatics_stt_uri
This commit is contained in:
@@ -10,7 +10,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
|
||||
testPlayHT,
|
||||
testRimelabs,
|
||||
testVerbioTts,
|
||||
testVerbioStt} = require('../../utils/speech-utils');
|
||||
testVerbioStt,
|
||||
testSpeechmaticsStt} = require('../../utils/speech-utils');
|
||||
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
|
||||
const {
|
||||
testGoogleTts,
|
||||
@@ -122,6 +123,7 @@ const encryptCredential = (obj) => {
|
||||
secret,
|
||||
nuance_tts_uri,
|
||||
nuance_stt_uri,
|
||||
speechmatics_stt_uri,
|
||||
deepgram_stt_uri,
|
||||
deepgram_stt_use_tls,
|
||||
deepgram_tts_uri,
|
||||
@@ -236,6 +238,12 @@ const encryptCredential = (obj) => {
|
||||
const elevenlabsData = JSON.stringify({api_key, model_id, options});
|
||||
return encrypt(elevenlabsData);
|
||||
|
||||
case 'speechmatics':
|
||||
assert(api_key, 'invalid speechmatics speech credential: api_key is required');
|
||||
assert(speechmatics_stt_uri, 'invalid speechmatics speech credential: speechmatics_stt_uri is required');
|
||||
const speechmaticsData = JSON.stringify({api_key, speechmatics_stt_uri, options});
|
||||
return encrypt(speechmaticsData);
|
||||
|
||||
case 'playht':
|
||||
assert(api_key, 'invalid playht speech credential: api_key is required');
|
||||
assert(user_id, 'invalid playht speech credential: user_id is required');
|
||||
@@ -768,6 +776,18 @@ router.get('/:sid/test', async(req, res) => {
|
||||
SpeechCredential.ttsTestResult(sid, false);
|
||||
}
|
||||
}
|
||||
} else if (cred.vendor === 'speechmatics') {
|
||||
const {api_key} = credential;
|
||||
if (cred.use_for_stt) {
|
||||
try {
|
||||
await testSpeechmaticsStt(logger, {api_key});
|
||||
results.stt.status = 'ok';
|
||||
SpeechCredential.ttsTestResult(sid, true);
|
||||
} catch (err) {
|
||||
results.stt = {status: 'fail', reason: err.message};
|
||||
SpeechCredential.ttsTestResult(sid, false);
|
||||
}
|
||||
}
|
||||
} else if (cred.vendor === 'playht') {
|
||||
if (cred.use_for_tts) {
|
||||
try {
|
||||
|
||||
@@ -7,6 +7,7 @@ const bent = require('bent');
|
||||
const fs = require('fs');
|
||||
const { AssemblyAI } = require('assemblyai');
|
||||
const {decrypt, obscureKey} = require('./encrypt-decrypt');
|
||||
const { RealtimeSession } = require('speechmatics');
|
||||
|
||||
const TtsGoogleLanguagesVoices = require('./speech-data/tts-google');
|
||||
const TtsAwsLanguagesVoices = require('./speech-data/tts-aws');
|
||||
@@ -54,6 +55,61 @@ const testSonioxStt = async(logger, credentials) => {
|
||||
});
|
||||
};
|
||||
|
||||
const testSpeechmaticsStt = async(logger, credentials) => {
|
||||
const {api_key, speechmatics_stt_uri} = credentials;
|
||||
return new Promise(async(resolve, reject) => {
|
||||
try {
|
||||
const session = new RealtimeSession({ apiKey: api_key, realtimeUrl: speechmatics_stt_uri });
|
||||
let transcription = '';
|
||||
session.addListener('Error', (error) => {
|
||||
reject(error);
|
||||
});
|
||||
|
||||
session.addListener('AddTranscript', (message) => {
|
||||
transcription += message.metadata.transcript;
|
||||
});
|
||||
|
||||
session.addListener('EndOfTranscript', () => {
|
||||
resolve(transcription);
|
||||
});
|
||||
|
||||
session
|
||||
.start({
|
||||
transcription_config: {
|
||||
language: 'en',
|
||||
operating_point: 'enhanced',
|
||||
enable_partials: true,
|
||||
max_delay: 2,
|
||||
},
|
||||
audio_format: { type: 'file' },
|
||||
})
|
||||
.then(() => {
|
||||
//prepare file stream
|
||||
const fileStream = fs.createReadStream(`${__dirname}/../../data/test_audio.wav`);
|
||||
|
||||
//send it
|
||||
fileStream.on('data', (sample) => {
|
||||
session.sendAudio(sample);
|
||||
});
|
||||
|
||||
//end the session
|
||||
fileStream.on('end', () => {
|
||||
session.stop();
|
||||
});
|
||||
|
||||
return;
|
||||
|
||||
})
|
||||
.catch((error) => {
|
||||
reject(error);
|
||||
});
|
||||
} catch (error) {
|
||||
logger.info({error}, 'failed to get speechmatics transcript');
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const testNuanceTts = async(logger, getTtsVoices, credentials) => {
|
||||
const voices = await getTtsVoices({vendor: 'nuance', credentials});
|
||||
return voices;
|
||||
@@ -532,6 +588,10 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
} else if ('soniox' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
} else if ('speechmatics' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
|
||||
} else if ('elevenlabs' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
@@ -1066,5 +1126,6 @@ module.exports = {
|
||||
testWhisper,
|
||||
testVerbioTts,
|
||||
testVerbioStt,
|
||||
getLanguagesAndVoicesForVendor
|
||||
getLanguagesAndVoicesForVendor,
|
||||
testSpeechmaticsStt
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user