Compare commits

...

7 Commits

Author SHA1 Message Date
Quan HL
7f904930ae fix azure cannot download mp3 if encodingMp3 = true 2024-05-03 19:43:45 +07:00
Hoan Luu Huu
c0fab2880b fix cannot send multipart to aws due to min size (#319) 2024-05-03 07:37:38 -04:00
Hoan Luu Huu
ce2fa392a4 support aws speech by roleArn (#313)
* support aws speech by roleArn

* support 3 types of aws  credentials

* wip

* wip

* update speech util version
2024-05-02 07:57:22 -04:00
Hoan Luu Huu
3b47162d13 Feat/record with pipeline (#318)
* use pipeline for nodejs streams

* use pipeline for nodejs streams
2024-04-30 07:39:24 -04:00
Hoan Luu Huu
b765232d4f api server cannot synthesize text after upgrade latest speech-utils (#317)
* api server cannot synthesize text after upgrade latest speech-utils

* wip

* add testcase for synthesize text

* fix synthesize testcase
2024-04-29 19:48:34 -04:00
Dave Horton
2436bea6ea add support for LCC updateCall with conferenceParticipantState (#296)
* add support for LCC updateCall with conferenceParticipantState

* wip

* wip
2024-04-22 11:06:08 -04:00
Dave Horton
f67abddbd4 bug: attempting to add duplicate dns records on hosted system (#312) 2024-04-19 18:13:27 -04:00
12 changed files with 229 additions and 46 deletions

2
app.js
View File

@@ -52,6 +52,7 @@ const {
getTtsVoices,
getTtsSize,
purgeTtsCache,
getAwsAuthToken,
synthAudio
} = require('@jambonz/speech-utils')({}, logger);
const {
@@ -95,6 +96,7 @@ app.locals = {
deleteKey,
getTtsVoices,
getTtsSize,
getAwsAuthToken,
purgeTtsCache,
synthAudio,
lookupAppBySid,

View File

@@ -16,7 +16,7 @@ class S3MultipartUploadStream extends Writable {
this.partNumber = 1;
this.multipartETags = [];
this.buffer = Buffer.alloc(0);
this.minPartSize = 2 * 1024 * 1024; // 5 MB
this.minPartSize = 5 * 1024 * 1024; // 5 MB
this.s3 = new S3Client(opts.bucketCredential);
this.metadata = opts.metadata;
}

View File

@@ -3,6 +3,7 @@ const Websocket = require('ws');
const PCMToMP3Encoder = require('./encoder');
const wav = require('wav');
const { getUploader } = require('./utils');
const { pipeline } = require('stream');
async function upload(logger, socket) {
socket._recvInitialMetadata = false;
@@ -60,22 +61,19 @@ async function upload(logger, socket) {
bitrate: 128
}, logger);
}
const handleError = (err, streamType) => {
logger.error(
{ err },
`Error while streaming for vendor: ${obj.vendor}, pipe: ${streamType}: ${err.message}`
);
};
/* start streaming data */
const duplex = Websocket.createWebSocketStream(socket);
duplex
.on('error', (err) => handleError(err, 'duplex'))
.pipe(encoder)
.on('error', (err) => handleError(err, 'encoder'))
.pipe(uploadStream)
.on('error', (err) => handleError(err, 'uploadStream'));
pipeline(
Websocket.createWebSocketStream(socket),
encoder,
uploadStream,
(error) => {
if (error) {
logger.error({ error }, 'pipeline error, cannot upload data to storage');
socket.close();
}
}
);
} else {
logger.info(`account ${accountSid} does not have any bucket credential, close the socket`);
socket.close();

View File

@@ -265,7 +265,8 @@ function validateUpdateCall(opts) {
'sip_request',
'record',
'tag',
'dtmf'
'dtmf',
'conferenceParticipantAction'
]
.reduce((acc, prop) => (opts[prop] ? ++acc : acc), 0);
@@ -316,6 +317,19 @@ function validateUpdateCall(opts) {
if (opts.tag && (typeof opts.tag !== 'object' || Array.isArray(opts.tag) || opts.tag === null)) {
throw new DbErrorBadRequest('invalid tag data');
}
if (opts.conferenceParticipantAction) {
if (!['tag', 'untag', 'coach', 'uncoach', 'mute', 'unmute', 'hold', 'unhold']
.includes(opts.conferenceParticipantAction.action)) {
throw new DbErrorBadRequest(
`conferenceParticipantAction invalid action property ${opts.conferenceParticipantAction.action}`);
}
if ('tag' == opts.conferenceParticipantAction.action && !opts.tag) {
throw new DbErrorBadRequest('conferenceParticipantAction requires tag property when action is \'tag\'');
}
if ('coach' == opts.conferenceParticipantAction.action && !opts.tag) {
throw new DbErrorBadRequest('conferenceParticipantAction requires tag property when action is \'coach\'');
}
}
}
function validateTo(to) {

View File

@@ -31,6 +31,7 @@ router.post('/:sip_realm', async(req, res) => {
const [sbcs] = await promisePool.query('SELECT ipv4 from sbc_addresses');
if (sbcs.length === 0) throw new Error('no SBC addresses provisioned in the database!');
const ips = sbcs.map((s) => s.ipv4);
const uniqueIps = [...new Set(ips)];
/* retrieve existing dns records */
const [old_recs] = await promisePool.query('SELECT record_id from dns_records WHERE account_sid = ?',
@@ -48,7 +49,7 @@ router.post('/:sip_realm', async(req, res) => {
}
/* add the dns records */
const records = await createDnsRecords(logger, domain, subdomain, ips);
const records = await createDnsRecords(logger, domain, subdomain, uniqueIps);
if (!records) throw new Error(`failure updating dns records for ${sip_realm}`);
const values = records.map((r) => {
return `('${uuid()}', '${account_sid}', '${r.type}', ${r.id})`;

View File

@@ -113,6 +113,7 @@ const encryptCredential = (obj) => {
secret_access_key,
aws_region,
api_key,
role_arn,
region,
client_id,
secret,
@@ -155,10 +156,17 @@ const encryptCredential = (obj) => {
return encrypt(service_key);
case 'aws':
assert(access_key_id, 'invalid aws speech credential: access_key_id is required');
assert(secret_access_key, 'invalid aws speech credential: secret_access_key is required');
assert(aws_region, 'invalid aws speech credential: aws_region is required');
const awsData = JSON.stringify({aws_region, access_key_id, secret_access_key});
// AWS polly can work for 3 types of credentials:
// 1/ access_key_id and secret_access_key
// 2/ RoleArn Assume role
// 3/ RoleArn assigned to instance profile where will run this application
const awsData = JSON.stringify(
{
aws_region,
...(access_key_id && {access_key_id}),
...(secret_access_key && {secret_access_key}),
...(role_arn && {role_arn}),
});
return encrypt(awsData);
case 'microsoft':
@@ -541,12 +549,13 @@ router.get('/:sid/test', async(req, res) => {
}
}
else if (cred.vendor === 'aws') {
const {getTtsVoices, getAwsAuthToken} = req.app.locals;
if (cred.use_for_tts) {
const {getTtsVoices} = req.app.locals;
try {
await testAwsTts(logger, getTtsVoices, {
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || process.env.AWS_REGION
});
results.tts.status = 'ok';
@@ -558,9 +567,10 @@ router.get('/:sid/test', async(req, res) => {
}
if (cred.use_for_stt) {
try {
await testAwsStt(logger, {
await testAwsStt(logger, getAwsAuthToken, {
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || process.env.AWS_REGION
});
results.stt.status = 'ok';

View File

@@ -10,6 +10,7 @@ const Account = require('../../models/account');
const sysError = require('../error');
const { getSpeechCredential, decryptCredential } = require('../../utils/speech-utils');
const PCMToMP3Encoder = require('../../record/encoder');
const { pipeline } = require('stream');
router.delete('/', async(req, res) => {
const {purgeTtsCache} = req.app.locals;
@@ -69,6 +70,8 @@ router.post('/Synthesize', async(req, res) => {
voice = arr[1];
model = arr[2];
}
} else if (cred.vendor === 'deepgram') {
model = voice;
}
const stats = {
histogram: () => {},
@@ -84,7 +87,8 @@ router.post('/Synthesize', async(req, res) => {
model,
salt,
credentials: cred,
disableTtsCache: false
disableTtsCache: false,
disableTtsStreaming: true
});
let contentType = 'audio/mpeg';
@@ -92,17 +96,27 @@ router.post('/Synthesize', async(req, res) => {
let readStream = fs.createReadStream(filePath);
if (['nuance', 'nvidia'].includes(cred.vendor) ||
(
process.env.JAMBONES_TTS_TRIM_SILENCE &&
(process.env.JAMBONES_TTS_TRIM_SILENCE || !process.env.JAMBONES_DISABLE_TTS_STREAMING) &&
['microsoft', 'azure'].includes(cred.vendor)
)
) {
if (encodingMp3) {
readStream = readStream
.pipe(new PCMToMP3Encoder({
readStream = pipeline(
readStream,
new PCMToMP3Encoder({
channels: 1,
sampleRate: 8000,
bitRate: 128
}, logger));
}, logger),
(err) => {
if (err) {
logger.error('ttscache/Synthesize failed:', err);
if (!res.headersSent) {
res.status(500).end('Server error');
}
}
}
);
} else {
contentType = 'application/octet-stream';
}
@@ -110,10 +124,17 @@ router.post('/Synthesize', async(req, res) => {
res.writeHead(200, {
'Content-Type': contentType,
});
readStream.pipe(res);
readStream.on('end', () => {
fs.unlink(filePath, (err) => {
if (err) throw err;
pipeline(readStream, res, (err) => {
if (err) {
logger.error('ttscache/Synthesize failed:', err);
if (!res.headersSent) {
res.status(500).end('Server error');
}
}
fs.unlink(filePath, (unlinkErr) => {
if (unlinkErr) throw unlinkErr;
logger.info(`${filePath} was deleted`);
});
});

View File

@@ -4152,6 +4152,22 @@ paths:
type: string
siprecServerURL:
type: string
conferenceParticipantAction:
type: object
properties:
action:
type: string
enum:
- tag
- untag
- coach
- uncoach
- mute
- unmute
- hold
- unhold
tag:
type: string
responses:
200:
description: Accepted

View File

@@ -165,16 +165,26 @@ const testAwsTts = async(logger, getTtsVoices, credentials) => {
}
};
const testAwsStt = async(logger, credentials) => {
const testAwsStt = async(logger, getAwsAuthToken, credentials) => {
try {
const {region, accessKeyId, secretAccessKey} = credentials;
const client = new TranscribeClient({
region,
credentials: {
accessKeyId,
secretAccessKey
}
});
const {region, accessKeyId, secretAccessKey, roleArn} = credentials;
let client = null;
if (accessKeyId && secretAccessKey) {
client = new TranscribeClient({
region,
credentials: {
accessKeyId,
secretAccessKey
}
});
} else if (roleArn) {
client = new TranscribeClient({
region,
credentials: await getAwsAuthToken(null, null, region, roleArn),
});
} else {
client = new TranscribeClient({region});
}
const command = new ListVocabulariesCommand({});
const response = await client.send(command);
return response;
@@ -400,6 +410,7 @@ const getSpeechCredential = (credential, logger) => {
...credential,
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || 'us-east-1'
};
}
@@ -421,6 +432,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
else if ('aws' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.access_key_id = o.access_key_id;
obj.role_arn = o.role_arn;
obj.secret_access_key = isObscureKey ? obscureKey(o.secret_access_key) : o.secret_access_key;
obj.aws_region = o.aws_region;
logger.info({obj, o}, 'retrieving aws speech credential');
@@ -584,6 +596,7 @@ async function getLanguagesVoicesForAws(credential, getTtsVoices, logger) {
credentials: {
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || process.env.AWS_REGION
}
});

8
package-lock.json generated
View File

@@ -19,7 +19,7 @@
"@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.0.51",
"@jambonz/speech-utils": "^0.1.0",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.69",
"@soniox/soniox-node": "^1.2.2",
@@ -2027,9 +2027,9 @@
}
},
"node_modules/@jambonz/speech-utils": {
"version": "0.0.51",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.0.51.tgz",
"integrity": "sha512-3Zk2CERs1PYQiCG08NDMNBbDzBBfPuEwgADTANMP56dd07PpW360ufL8CcQfkBmWKGVma0wevRrv6DQLu2Ifdg==",
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.0.tgz",
"integrity": "sha512-45K6Vrl2PMEbbcnvm65afCDujDxck/bEUq7+P6KRw/cei3mrKtwjGh3HXi1cKhC1gA5UF1+5YrUoPO9LdoZnog==",
"dependencies": {
"@aws-sdk/client-polly": "^3.496.0",
"@aws-sdk/client-sts": "^3.496.0",

View File

@@ -29,7 +29,7 @@
"@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.0.51",
"@jambonz/speech-utils": "^0.1.0",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.69",
"@soniox/soniox-node": "^1.2.2",

View File

@@ -170,6 +170,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for google tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for google stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "en-US-Standard-C"
}
});
t.ok(result.statusCode === 200, 'successfully google tested synthesize');
}
/* add / test a credential for microsoft */
@@ -198,6 +212,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for microsoft tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for microsoft stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "en-US-AvaMultilingualNeural"
}
});
t.ok(result.statusCode === 200, 'successfully microsoft tested synthesize');
}
/* add / test a credential for AWS */
@@ -227,6 +255,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for AWS tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for AWS stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "Joanna"
}
});
t.ok(result.statusCode === 200, 'successfully AWS tested synthesize');
}
/* add a credential for wellsaid */
@@ -253,6 +295,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for wellsaid');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "3"
}
});
t.ok(result.statusCode === 200, 'successfully Wellsaid tested synthesize');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
@@ -285,6 +341,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for deepgram');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "aura-asteria-en"
}
});
t.ok(result.statusCode === 200, 'successfully deepgram tested synthesize');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
@@ -404,6 +474,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for ibm stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "en-US_MichaelExpressive"
}
});
t.ok(result.statusCode === 200, 'successfully IBM tested synthesize');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
@@ -659,6 +743,30 @@ test('speech credentials tests', async(t) => {
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
/* add a credential for aws polly by roleArn */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'aws',
labe: 'aws_polly_with_arn',
use_for_tts: true,
use_for_stt: false,
role_arn: 'Arn::aws::role',
aws_region: 'us-east-1'
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for AWS Polly By RoleArn');
const awsPollySid = result.body.sid;
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${awsPollySid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
/* Check google supportedLanguagesAndVoices */
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/speech/supportedLanguagesAndVoices?vendor=google`, {
resolveWithFullResponse: true,