support stt speechmatics (#353)

* support stt speechmatics

* support speechmatics region authentication

* update testcase for speechmatics_stt_uri
This commit is contained in:
Hoan Luu Huu
2024-10-11 20:17:40 +07:00
committed by GitHub
parent 77b9ca4cba
commit 6e779f6744
5 changed files with 156 additions and 2 deletions

View File

@@ -10,7 +10,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
testPlayHT,
testRimelabs,
testVerbioTts,
testVerbioStt} = require('../../utils/speech-utils');
testVerbioStt,
testSpeechmaticsStt} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -122,6 +123,7 @@ const encryptCredential = (obj) => {
secret,
nuance_tts_uri,
nuance_stt_uri,
speechmatics_stt_uri,
deepgram_stt_uri,
deepgram_stt_use_tls,
deepgram_tts_uri,
@@ -236,6 +238,12 @@ const encryptCredential = (obj) => {
const elevenlabsData = JSON.stringify({api_key, model_id, options});
return encrypt(elevenlabsData);
case 'speechmatics':
assert(api_key, 'invalid speechmatics speech credential: api_key is required');
assert(speechmatics_stt_uri, 'invalid speechmatics speech credential: speechmatics_stt_uri is required');
const speechmaticsData = JSON.stringify({api_key, speechmatics_stt_uri, options});
return encrypt(speechmaticsData);
case 'playht':
assert(api_key, 'invalid playht speech credential: api_key is required');
assert(user_id, 'invalid playht speech credential: user_id is required');
@@ -768,6 +776,18 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'speechmatics') {
const {api_key} = credential;
if (cred.use_for_stt) {
try {
await testSpeechmaticsStt(logger, {api_key});
results.stt.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.stt = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'playht') {
if (cred.use_for_tts) {
try {

View File

@@ -7,6 +7,7 @@ const bent = require('bent');
const fs = require('fs');
const { AssemblyAI } = require('assemblyai');
const {decrypt, obscureKey} = require('./encrypt-decrypt');
const { RealtimeSession } = require('speechmatics');
const TtsGoogleLanguagesVoices = require('./speech-data/tts-google');
const TtsAwsLanguagesVoices = require('./speech-data/tts-aws');
@@ -54,6 +55,61 @@ const testSonioxStt = async(logger, credentials) => {
});
};
const testSpeechmaticsStt = async(logger, credentials) => {
const {api_key, speechmatics_stt_uri} = credentials;
return new Promise(async(resolve, reject) => {
try {
const session = new RealtimeSession({ apiKey: api_key, realtimeUrl: speechmatics_stt_uri });
let transcription = '';
session.addListener('Error', (error) => {
reject(error);
});
session.addListener('AddTranscript', (message) => {
transcription += message.metadata.transcript;
});
session.addListener('EndOfTranscript', () => {
resolve(transcription);
});
session
.start({
transcription_config: {
language: 'en',
operating_point: 'enhanced',
enable_partials: true,
max_delay: 2,
},
audio_format: { type: 'file' },
})
.then(() => {
//prepare file stream
const fileStream = fs.createReadStream(`${__dirname}/../../data/test_audio.wav`);
//send it
fileStream.on('data', (sample) => {
session.sendAudio(sample);
});
//end the session
fileStream.on('end', () => {
session.stop();
});
return;
})
.catch((error) => {
reject(error);
});
} catch (error) {
logger.info({error}, 'failed to get speechmatics transcript');
reject(error);
}
});
};
const testNuanceTts = async(logger, getTtsVoices, credentials) => {
const voices = await getTtsVoices({vendor: 'nuance', credentials});
return voices;
@@ -532,6 +588,10 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
} else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
} else if ('speechmatics' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
} else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
@@ -1066,5 +1126,6 @@ module.exports = {
testWhisper,
testVerbioTts,
testVerbioStt,
getLanguagesAndVoicesForVendor
getLanguagesAndVoicesForVendor,
testSpeechmaticsStt
};