support stt speechmatics (#353)

* support stt speechmatics

* support speechmatics region authentication

* update testcase for speechmatics_stt_uri
This commit is contained in:
Hoan Luu Huu
2024-10-11 20:17:40 +07:00
committed by GitHub
parent 77b9ca4cba
commit 6e779f6744
5 changed files with 156 additions and 2 deletions

View File

@@ -10,7 +10,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
testPlayHT,
testRimelabs,
testVerbioTts,
testVerbioStt} = require('../../utils/speech-utils');
testVerbioStt,
testSpeechmaticsStt} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -122,6 +123,7 @@ const encryptCredential = (obj) => {
secret,
nuance_tts_uri,
nuance_stt_uri,
speechmatics_stt_uri,
deepgram_stt_uri,
deepgram_stt_use_tls,
deepgram_tts_uri,
@@ -236,6 +238,12 @@ const encryptCredential = (obj) => {
const elevenlabsData = JSON.stringify({api_key, model_id, options});
return encrypt(elevenlabsData);
case 'speechmatics':
assert(api_key, 'invalid speechmatics speech credential: api_key is required');
assert(speechmatics_stt_uri, 'invalid speechmatics speech credential: speechmatics_stt_uri is required');
const speechmaticsData = JSON.stringify({api_key, speechmatics_stt_uri, options});
return encrypt(speechmaticsData);
case 'playht':
assert(api_key, 'invalid playht speech credential: api_key is required');
assert(user_id, 'invalid playht speech credential: user_id is required');
@@ -768,6 +776,18 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'speechmatics') {
const {api_key} = credential;
if (cred.use_for_stt) {
try {
await testSpeechmaticsStt(logger, {api_key});
results.stt.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.stt = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'playht') {
if (cred.use_for_tts) {
try {

View File

@@ -7,6 +7,7 @@ const bent = require('bent');
const fs = require('fs');
const { AssemblyAI } = require('assemblyai');
const {decrypt, obscureKey} = require('./encrypt-decrypt');
const { RealtimeSession } = require('speechmatics');
const TtsGoogleLanguagesVoices = require('./speech-data/tts-google');
const TtsAwsLanguagesVoices = require('./speech-data/tts-aws');
@@ -54,6 +55,61 @@ const testSonioxStt = async(logger, credentials) => {
});
};
const testSpeechmaticsStt = async(logger, credentials) => {
const {api_key, speechmatics_stt_uri} = credentials;
return new Promise(async(resolve, reject) => {
try {
const session = new RealtimeSession({ apiKey: api_key, realtimeUrl: speechmatics_stt_uri });
let transcription = '';
session.addListener('Error', (error) => {
reject(error);
});
session.addListener('AddTranscript', (message) => {
transcription += message.metadata.transcript;
});
session.addListener('EndOfTranscript', () => {
resolve(transcription);
});
session
.start({
transcription_config: {
language: 'en',
operating_point: 'enhanced',
enable_partials: true,
max_delay: 2,
},
audio_format: { type: 'file' },
})
.then(() => {
//prepare file stream
const fileStream = fs.createReadStream(`${__dirname}/../../data/test_audio.wav`);
//send it
fileStream.on('data', (sample) => {
session.sendAudio(sample);
});
//end the session
fileStream.on('end', () => {
session.stop();
});
return;
})
.catch((error) => {
reject(error);
});
} catch (error) {
logger.info({error}, 'failed to get speechmatics transcript');
reject(error);
}
});
};
const testNuanceTts = async(logger, getTtsVoices, credentials) => {
const voices = await getTtsVoices({vendor: 'nuance', credentials});
return voices;
@@ -532,6 +588,10 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
} else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
} else if ('speechmatics' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
} else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
@@ -1066,5 +1126,6 @@ module.exports = {
testWhisper,
testVerbioTts,
testVerbioStt,
getLanguagesAndVoicesForVendor
getLanguagesAndVoicesForVendor,
testSpeechmaticsStt
};

39
package-lock.json generated
View File

@@ -42,6 +42,7 @@
"passport-http-bearer": "^1.0.1",
"pino": "^8.20.0",
"short-uuid": "^4.2.2",
"speechmatics": "^4.0.0",
"stream-buffers": "^3.0.2",
"stripe": "^14.24.0",
"swagger-ui-express": "^5.0.0",
@@ -9046,6 +9047,35 @@
"node": ">=8"
}
},
"node_modules/speechmatics": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/speechmatics/-/speechmatics-4.0.0.tgz",
"integrity": "sha512-k4za7lTBA8Y3rXXqRBxHWQcwmZ2j4bewSFqKek/FpPNj+fgChtKH4UU1uDUHb9RbZ6R4zTwfZQoaNqHU3dUmEw==",
"license": "MIT",
"dependencies": {
"bufferutil": "^4.0.7",
"events": "^3.3.0",
"utf-8-validate": "^6.0.3",
"ws": "^8.13.0",
"zod": "^3.21.4"
},
"engines": {
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/speechmatics/node_modules/utf-8-validate": {
"version": "6.0.4",
"resolved": "https://registry.npmjs.org/utf-8-validate/-/utf-8-validate-6.0.4.tgz",
"integrity": "sha512-xu9GQDeFp+eZ6LnCywXN/zBancWvOpUMzgjLPSjy4BRHSmTelvn2E0DG0o1sTiw5hkCKBHo8rwSKncfRfv2EEQ==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-gyp-build": "^4.3.0"
},
"engines": {
"node": ">=6.14.2"
}
},
"node_modules/split2": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
@@ -10157,6 +10187,15 @@
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/zod": {
"version": "3.23.8",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
"integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
}
}
}

View File

@@ -52,6 +52,7 @@
"passport-http-bearer": "^1.0.1",
"pino": "^8.20.0",
"short-uuid": "^4.2.2",
"speechmatics": "^4.0.0",
"stream-buffers": "^3.0.2",
"stripe": "^14.24.0",
"swagger-ui-express": "^5.0.0",

View File

@@ -532,6 +532,39 @@ test('speech credentials tests', async(t) => {
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
}
/* add a credential for Speechmatics */
if (process.env.SPEECHMATICS_API_KEY) {
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'speechmatics',
use_for_stt: true,
api_key: process.env.SPEECHMATICS_API_KEY,
speechmatics_stt_uri: 'eu2.rt.speechmatics.com'
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for speechmatics');
const ms_sid = result.body.sid;
/* test the speech credential */
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
});
console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for speechmatics');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
}
/* add a credential for nvidia */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,