Compare commits

..

4 Commits

Author SHA1 Message Date
Quan HL
4e59bcb9fa fix: update custom stt/tss url 2023-03-05 06:33:25 +07:00
Dave Horton
07421d830e change property names for custom speech 2023-03-02 15:28:25 -05:00
Dave Horton
5c687aebdd add support for custom speech api 2023-03-02 14:04:03 -05:00
EgleH
cc384995ea add support for soniox speech (#120)
Co-authored-by: eglehelms <e.helms@cognigy.com>
2023-02-26 11:31:39 -05:00
6 changed files with 130 additions and 5 deletions

View File

@@ -86,6 +86,14 @@ SpeechCredential.fields = [
{
name: 'last_tested',
type: 'date'
},
{
name: 'custom_tts_url',
type: 'date'
},
{
name: 'custom_stt_url',
type: 'date'
}
];

View File

@@ -17,6 +17,7 @@ const {
testNuanceStt,
testNuanceTts,
testDeepgramStt,
testSonioxStt,
testIbmTts,
testIbmStt
} = require('../../utils/speech-utils');
@@ -52,7 +53,10 @@ const encryptCredential = (obj) => {
stt_api_key,
stt_region,
riva_server_uri,
instance_id
instance_id,
custom_stt_url,
custom_tts_url,
auth_token = ''
} = obj;
switch (vendor) {
@@ -112,8 +116,17 @@ const encryptCredential = (obj) => {
const nvidiaData = JSON.stringify({ riva_server_uri });
return encrypt(nvidiaData);
case 'soniox':
assert(api_key, 'invalid soniox speech credential: api_key is required');
const sonioxData = JSON.stringify({api_key});
return encrypt(sonioxData);
default:
assert(false, `invalid or missing vendor: ${vendor}`);
if (vendor.startsWith('custom:')) {
const customData = JSON.stringify({auth_token, custom_stt_url, custom_tts_url});
return encrypt(customData);
}
else assert(false, `invalid or missing vendor: ${vendor}`);
}
};
@@ -220,6 +233,16 @@ router.get('/', async(req, res) => {
const o = JSON.parse(decrypt(credential));
obj.riva_server_uri = o.riva_server_uri;
}
else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
}
else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = obscureKey(o.auth_token);
obj.custom_stt_url = o.custom_stt_url;
obj.custom_tts_url = o.custom_tts_url;
}
return obj;
}));
} catch (err) {
@@ -285,6 +308,12 @@ router.get('/:sid', async(req, res) => {
const o = JSON.parse(decrypt(credential));
obj.riva_server_uri = o.riva_server_uri;
}
else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = obscureKey(o.auth_token);
obj.custom_stt_url = o.custom_stt_url;
obj.custom_tts_url = o.custom_tts_url;
}
res.status(200).json(obj);
} catch (err) {
sysError(logger, res, err);
@@ -314,7 +343,8 @@ router.put('/:sid', async(req, res) => {
const sid = req.params.sid;
const logger = req.app.locals.logger;
try {
const {use_for_tts, use_for_stt, region, aws_region, stt_region, tts_region, riva_server_uri} = req.body;
const {use_for_tts, use_for_stt, region, aws_region, stt_region, tts_region,
riva_server_uri, custom_stt_url, custom_tts_url} = req.body;
if (typeof use_for_tts === 'undefined' && typeof use_for_stt === 'undefined') {
throw new DbErrorUnprocessableRequest('use_for_tts and use_for_stt are the only updateable fields');
}
@@ -350,7 +380,9 @@ router.put('/:sid', async(req, res) => {
custom_stt_endpoint,
stt_region,
tts_region,
riva_server_uri
riva_server_uri,
custom_stt_url,
custom_tts_url
};
logger.info({o, newCred}, 'updating speech credential with this new credential');
obj.credential = encryptCredential(newCred);
@@ -583,8 +615,22 @@ router.get('/:sid/test', async(req, res) => {
}
}
}
else if (cred.vendor === 'soniox') {
const {api_key} = credential;
if (cred.use_for_stt) {
try {
await testSonioxStt(logger, {api_key});
results.stt.status = 'ok';
SpeechCredential.sttTestResult(sid, true);
} catch (err) {
results.stt = {status: 'fail', reason: err.message};
SpeechCredential.sttTestResult(sid, false);
}
}
}
res.status(200).json(results);
} catch (err) {
sysError(logger, res, err);
}

View File

@@ -4,9 +4,27 @@ const Polly = require('aws-sdk/clients/polly');
const AWS = require('aws-sdk');
const { Deepgram } = require('@deepgram/sdk');
const sdk = require('microsoft-cognitiveservices-speech-sdk');
const { SpeechClient } = require('@soniox/soniox-node');
const bent = require('bent');
const fs = require('fs');
const testSonioxStt = async(logger, credentials) => {
const api_key = credentials;
const soniox = new SpeechClient(api_key);
return new Promise(async(resolve, reject) => {
try {
const result = await soniox.transcribeFileShort('data/test_audio.wav');
if (result.words.length > 0) resolve(result);
else reject(new Error('no transcript returned'));
} catch (error) {
logger.info({error}, 'failed to get soniox transcript');
reject(error);
}
});
};
const testNuanceTts = async(logger, getTtsVoices, credentials) => {
const voices = await getTtsVoices({vendor: 'nuance', credentials});
return voices;
@@ -225,5 +243,6 @@ module.exports = {
testNuanceStt,
testDeepgramStt,
testIbmTts,
testIbmStt
testIbmStt,
testSonioxStt
};

19
package-lock.json generated
View File

@@ -16,6 +16,7 @@
"@jambonz/realtimedb-helpers": "^0.6.0",
"@jambonz/time-series": "^0.2.5",
"@jambonz/verb-specifications": "^0.0.3",
"@soniox/soniox-node": "^1.1.0",
"argon2-ffi": "^2.0.0",
"aws-sdk": "^2.1302.0",
"bent": "^7.3.12",
@@ -841,6 +842,15 @@
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
},
"node_modules/@soniox/soniox-node": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@soniox/soniox-node/-/soniox-node-1.1.0.tgz",
"integrity": "sha512-LtE1JZZ3PFAWyqJpp/UwYmMx1IbZ+JR/OzFmyQ1Oj4mrMrvGpH+lTMIpdG8s6/lDhhefoKP5OTziks8nQNkdew==",
"dependencies": {
"@grpc/grpc-js": "^1.6.10",
"@grpc/proto-loader": "^0.7.2"
}
},
"node_modules/@tokenizer/token": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",
@@ -8273,6 +8283,15 @@
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
},
"@soniox/soniox-node": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@soniox/soniox-node/-/soniox-node-1.1.0.tgz",
"integrity": "sha512-LtE1JZZ3PFAWyqJpp/UwYmMx1IbZ+JR/OzFmyQ1Oj4mrMrvGpH+lTMIpdG8s6/lDhhefoKP5OTziks8nQNkdew==",
"requires": {
"@grpc/grpc-js": "^1.6.10",
"@grpc/proto-loader": "^0.7.2"
}
},
"@tokenizer/token": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",

View File

@@ -25,6 +25,7 @@
"@jambonz/realtimedb-helpers": "^0.6.0",
"@jambonz/time-series": "^0.2.5",
"@jambonz/verb-specifications": "^0.0.3",
"@soniox/soniox-node": "^1.1.0",
"argon2-ffi": "^2.0.0",
"aws-sdk": "^2.1302.0",
"bent": "^7.3.12",

View File

@@ -300,6 +300,38 @@ test('speech credentials tests', async(t) => {
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
}
/* add a credential for Siniox */
if (process.env.SONIOX_API_KEY) {
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'soniox',
use_for_stt: true,
api_key: process.env.SONIOX_API_KEY
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for soniox');
const ms_sid = result.body.sid;
/* test the speech credential */
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
});
console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for soniox');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
}
/* add a credential for nvidia */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,