mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2025-12-19 05:47:46 +00:00
support stt speechmatics (#353)
* support stt speechmatics * support speechmatics region authentication * update testcase for speechmatics_stt_uri
This commit is contained in:
@@ -10,7 +10,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
|
||||
testPlayHT,
|
||||
testRimelabs,
|
||||
testVerbioTts,
|
||||
testVerbioStt} = require('../../utils/speech-utils');
|
||||
testVerbioStt,
|
||||
testSpeechmaticsStt} = require('../../utils/speech-utils');
|
||||
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
|
||||
const {
|
||||
testGoogleTts,
|
||||
@@ -122,6 +123,7 @@ const encryptCredential = (obj) => {
|
||||
secret,
|
||||
nuance_tts_uri,
|
||||
nuance_stt_uri,
|
||||
speechmatics_stt_uri,
|
||||
deepgram_stt_uri,
|
||||
deepgram_stt_use_tls,
|
||||
deepgram_tts_uri,
|
||||
@@ -236,6 +238,12 @@ const encryptCredential = (obj) => {
|
||||
const elevenlabsData = JSON.stringify({api_key, model_id, options});
|
||||
return encrypt(elevenlabsData);
|
||||
|
||||
case 'speechmatics':
|
||||
assert(api_key, 'invalid speechmatics speech credential: api_key is required');
|
||||
assert(speechmatics_stt_uri, 'invalid speechmatics speech credential: speechmatics_stt_uri is required');
|
||||
const speechmaticsData = JSON.stringify({api_key, speechmatics_stt_uri, options});
|
||||
return encrypt(speechmaticsData);
|
||||
|
||||
case 'playht':
|
||||
assert(api_key, 'invalid playht speech credential: api_key is required');
|
||||
assert(user_id, 'invalid playht speech credential: user_id is required');
|
||||
@@ -768,6 +776,18 @@ router.get('/:sid/test', async(req, res) => {
|
||||
SpeechCredential.ttsTestResult(sid, false);
|
||||
}
|
||||
}
|
||||
} else if (cred.vendor === 'speechmatics') {
|
||||
const {api_key} = credential;
|
||||
if (cred.use_for_stt) {
|
||||
try {
|
||||
await testSpeechmaticsStt(logger, {api_key});
|
||||
results.stt.status = 'ok';
|
||||
SpeechCredential.ttsTestResult(sid, true);
|
||||
} catch (err) {
|
||||
results.stt = {status: 'fail', reason: err.message};
|
||||
SpeechCredential.ttsTestResult(sid, false);
|
||||
}
|
||||
}
|
||||
} else if (cred.vendor === 'playht') {
|
||||
if (cred.use_for_tts) {
|
||||
try {
|
||||
|
||||
@@ -7,6 +7,7 @@ const bent = require('bent');
|
||||
const fs = require('fs');
|
||||
const { AssemblyAI } = require('assemblyai');
|
||||
const {decrypt, obscureKey} = require('./encrypt-decrypt');
|
||||
const { RealtimeSession } = require('speechmatics');
|
||||
|
||||
const TtsGoogleLanguagesVoices = require('./speech-data/tts-google');
|
||||
const TtsAwsLanguagesVoices = require('./speech-data/tts-aws');
|
||||
@@ -54,6 +55,61 @@ const testSonioxStt = async(logger, credentials) => {
|
||||
});
|
||||
};
|
||||
|
||||
const testSpeechmaticsStt = async(logger, credentials) => {
|
||||
const {api_key, speechmatics_stt_uri} = credentials;
|
||||
return new Promise(async(resolve, reject) => {
|
||||
try {
|
||||
const session = new RealtimeSession({ apiKey: api_key, realtimeUrl: speechmatics_stt_uri });
|
||||
let transcription = '';
|
||||
session.addListener('Error', (error) => {
|
||||
reject(error);
|
||||
});
|
||||
|
||||
session.addListener('AddTranscript', (message) => {
|
||||
transcription += message.metadata.transcript;
|
||||
});
|
||||
|
||||
session.addListener('EndOfTranscript', () => {
|
||||
resolve(transcription);
|
||||
});
|
||||
|
||||
session
|
||||
.start({
|
||||
transcription_config: {
|
||||
language: 'en',
|
||||
operating_point: 'enhanced',
|
||||
enable_partials: true,
|
||||
max_delay: 2,
|
||||
},
|
||||
audio_format: { type: 'file' },
|
||||
})
|
||||
.then(() => {
|
||||
//prepare file stream
|
||||
const fileStream = fs.createReadStream(`${__dirname}/../../data/test_audio.wav`);
|
||||
|
||||
//send it
|
||||
fileStream.on('data', (sample) => {
|
||||
session.sendAudio(sample);
|
||||
});
|
||||
|
||||
//end the session
|
||||
fileStream.on('end', () => {
|
||||
session.stop();
|
||||
});
|
||||
|
||||
return;
|
||||
|
||||
})
|
||||
.catch((error) => {
|
||||
reject(error);
|
||||
});
|
||||
} catch (error) {
|
||||
logger.info({error}, 'failed to get speechmatics transcript');
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const testNuanceTts = async(logger, getTtsVoices, credentials) => {
|
||||
const voices = await getTtsVoices({vendor: 'nuance', credentials});
|
||||
return voices;
|
||||
@@ -532,6 +588,10 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
} else if ('soniox' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
} else if ('speechmatics' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
|
||||
} else if ('elevenlabs' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
@@ -1066,5 +1126,6 @@ module.exports = {
|
||||
testWhisper,
|
||||
testVerbioTts,
|
||||
testVerbioStt,
|
||||
getLanguagesAndVoicesForVendor
|
||||
getLanguagesAndVoicesForVendor,
|
||||
testSpeechmaticsStt
|
||||
};
|
||||
|
||||
39
package-lock.json
generated
39
package-lock.json
generated
@@ -42,6 +42,7 @@
|
||||
"passport-http-bearer": "^1.0.1",
|
||||
"pino": "^8.20.0",
|
||||
"short-uuid": "^4.2.2",
|
||||
"speechmatics": "^4.0.0",
|
||||
"stream-buffers": "^3.0.2",
|
||||
"stripe": "^14.24.0",
|
||||
"swagger-ui-express": "^5.0.0",
|
||||
@@ -9046,6 +9047,35 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/speechmatics": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/speechmatics/-/speechmatics-4.0.0.tgz",
|
||||
"integrity": "sha512-k4za7lTBA8Y3rXXqRBxHWQcwmZ2j4bewSFqKek/FpPNj+fgChtKH4UU1uDUHb9RbZ6R4zTwfZQoaNqHU3dUmEw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"bufferutil": "^4.0.7",
|
||||
"events": "^3.3.0",
|
||||
"utf-8-validate": "^6.0.3",
|
||||
"ws": "^8.13.0",
|
||||
"zod": "^3.21.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/speechmatics/node_modules/utf-8-validate": {
|
||||
"version": "6.0.4",
|
||||
"resolved": "https://registry.npmjs.org/utf-8-validate/-/utf-8-validate-6.0.4.tgz",
|
||||
"integrity": "sha512-xu9GQDeFp+eZ6LnCywXN/zBancWvOpUMzgjLPSjy4BRHSmTelvn2E0DG0o1sTiw5hkCKBHo8rwSKncfRfv2EEQ==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"node-gyp-build": "^4.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.14.2"
|
||||
}
|
||||
},
|
||||
"node_modules/split2": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
|
||||
@@ -10157,6 +10187,15 @@
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.23.8",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
|
||||
"integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@
|
||||
"passport-http-bearer": "^1.0.1",
|
||||
"pino": "^8.20.0",
|
||||
"short-uuid": "^4.2.2",
|
||||
"speechmatics": "^4.0.0",
|
||||
"stream-buffers": "^3.0.2",
|
||||
"stripe": "^14.24.0",
|
||||
"swagger-ui-express": "^5.0.0",
|
||||
|
||||
@@ -532,6 +532,39 @@ test('speech credentials tests', async(t) => {
|
||||
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
|
||||
}
|
||||
|
||||
/* add a credential for Speechmatics */
|
||||
if (process.env.SPEECHMATICS_API_KEY) {
|
||||
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
|
||||
resolveWithFullResponse: true,
|
||||
auth: authUser,
|
||||
json: true,
|
||||
body: {
|
||||
vendor: 'speechmatics',
|
||||
use_for_stt: true,
|
||||
api_key: process.env.SPEECHMATICS_API_KEY,
|
||||
speechmatics_stt_uri: 'eu2.rt.speechmatics.com'
|
||||
}
|
||||
});
|
||||
t.ok(result.statusCode === 201, 'successfully added speech credential for speechmatics');
|
||||
const ms_sid = result.body.sid;
|
||||
|
||||
/* test the speech credential */
|
||||
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
|
||||
resolveWithFullResponse: true,
|
||||
auth: authUser,
|
||||
json: true,
|
||||
});
|
||||
console.log(JSON.stringify(result));
|
||||
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for speechmatics');
|
||||
|
||||
/* delete the credential */
|
||||
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
|
||||
auth: authUser,
|
||||
resolveWithFullResponse: true,
|
||||
});
|
||||
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
|
||||
}
|
||||
|
||||
/* add a credential for nvidia */
|
||||
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
|
||||
resolveWithFullResponse: true,
|
||||
|
||||
Reference in New Issue
Block a user