fix azure cannot download mp3 if encodingMp3 = true

fix cannot send multipart to aws due to min size (#319 )
support aws speech by roleArn (#313 )
2026-01-25 02:08:24 +00:00 · 2024-05-03 19:43:45 +07:00 · 2024-05-03 07:37:38 -04:00 · 2024-05-02 07:57:22 -04:00 · 2024-04-30 07:39:24 -04:00 · 2024-04-29 19:48:34 -04:00
12 changed files with 229 additions and 46 deletions
--- a/app.js
+++ b/app.js
@@ -52,6 +52,7 @@ const {
  getTtsVoices,
  getTtsSize,
  purgeTtsCache,
+  getAwsAuthToken,
  synthAudio
 } = require('@jambonz/speech-utils')({}, logger);
 const {
@@ -95,6 +96,7 @@ app.locals = {
  deleteKey,
  getTtsVoices,
  getTtsSize,
+  getAwsAuthToken,
  purgeTtsCache,
  synthAudio,
  lookupAppBySid,
--- a/lib/record/s3-multipart-upload-stream.js
+++ b/lib/record/s3-multipart-upload-stream.js
@@ -16,7 +16,7 @@ class S3MultipartUploadStream extends Writable {
    this.partNumber = 1;
    this.multipartETags = [];
    this.buffer = Buffer.alloc(0);
-    this.minPartSize = 2 * 1024 * 1024; // 5 MB
+    this.minPartSize = 5 * 1024 * 1024; // 5 MB
    this.s3 = new S3Client(opts.bucketCredential);
    this.metadata = opts.metadata;
  }
--- a/lib/record/upload.js
+++ b/lib/record/upload.js
@@ -3,6 +3,7 @@ const Websocket = require('ws');
 const PCMToMP3Encoder = require('./encoder');
 const wav = require('wav');
 const { getUploader } = require('./utils');
+const { pipeline } = require('stream');

 async function upload(logger, socket) {
  socket._recvInitialMetadata = false;
@@ -60,22 +61,19 @@ async function upload(logger, socket) {
              bitrate: 128
            }, logger);
          }
-          const handleError = (err, streamType) => {
-            logger.error(
-              { err },
-              `Error while streaming for vendor: ${obj.vendor}, pipe: ${streamType}: ${err.message}`
-            );
-          };

          /* start streaming data */
-          const duplex = Websocket.createWebSocketStream(socket);
-          duplex
-            .on('error', (err) => handleError(err, 'duplex'))
-            .pipe(encoder)
-            .on('error', (err) => handleError(err, 'encoder'))
-            .pipe(uploadStream)
-            .on('error', (err) => handleError(err, 'uploadStream'));
-
+          pipeline(
+            Websocket.createWebSocketStream(socket),
+            encoder,
+            uploadStream,
+            (error) => {
+              if (error) {
+                logger.error({ error }, 'pipeline error, cannot upload data to storage');
+                socket.close();
+              }
+            }
+          );
        } else {
          logger.info(`account ${accountSid} does not have any bucket credential, close the socket`);
          socket.close();
--- a/lib/routes/api/accounts.js
+++ b/lib/routes/api/accounts.js
@@ -265,7 +265,8 @@ function validateUpdateCall(opts) {
    'sip_request',
    'record',
    'tag',
-    'dtmf'
+    'dtmf',
+    'conferenceParticipantAction'
  ]
    .reduce((acc, prop) => (opts[prop] ? ++acc : acc), 0);

@@ -316,6 +317,19 @@ function validateUpdateCall(opts) {
  if (opts.tag && (typeof opts.tag !== 'object' || Array.isArray(opts.tag) || opts.tag === null)) {
    throw new DbErrorBadRequest('invalid tag data');
  }
+  if (opts.conferenceParticipantAction) {
+    if (!['tag', 'untag', 'coach', 'uncoach', 'mute', 'unmute', 'hold', 'unhold']
+      .includes(opts.conferenceParticipantAction.action)) {
+      throw new DbErrorBadRequest(
+        `conferenceParticipantAction invalid action property ${opts.conferenceParticipantAction.action}`);
+    }
+    if ('tag' == opts.conferenceParticipantAction.action && !opts.tag) {
+      throw new DbErrorBadRequest('conferenceParticipantAction requires tag property when action is \'tag\'');
+    }
+    if ('coach' == opts.conferenceParticipantAction.action && !opts.tag) {
+      throw new DbErrorBadRequest('conferenceParticipantAction requires tag property when action is \'coach\'');
+    }
+  }
 }

 function validateTo(to) {
--- a/lib/routes/api/sip-realm.js
+++ b/lib/routes/api/sip-realm.js
@@ -31,6 +31,7 @@ router.post('/:sip_realm', async(req, res) => {
      const [sbcs] = await promisePool.query('SELECT ipv4 from sbc_addresses');
      if (sbcs.length === 0) throw new Error('no SBC addresses provisioned in the database!');
      const ips = sbcs.map((s) => s.ipv4);
+      const uniqueIps = [...new Set(ips)];

      /* retrieve existing dns records */
      const [old_recs] = await promisePool.query('SELECT record_id from dns_records WHERE account_sid = ?',
@@ -48,7 +49,7 @@ router.post('/:sip_realm', async(req, res) => {
      }

      /* add the dns records */
-      const records = await createDnsRecords(logger, domain, subdomain, ips);
+      const records = await createDnsRecords(logger, domain, subdomain, uniqueIps);
      if (!records) throw new Error(`failure updating dns records for ${sip_realm}`);
      const values = records.map((r) => {
        return `('${uuid()}', '${account_sid}', '${r.type}', ${r.id})`;
--- a/lib/routes/api/speech-credentials.js
+++ b/lib/routes/api/speech-credentials.js
@@ -113,6 +113,7 @@ const encryptCredential = (obj) => {
    secret_access_key,
    aws_region,
    api_key,
+    role_arn,
    region,
    client_id,
    secret,
@@ -155,10 +156,17 @@ const encryptCredential = (obj) => {
      return encrypt(service_key);

    case 'aws':
-      assert(access_key_id, 'invalid aws speech credential: access_key_id is required');
-      assert(secret_access_key, 'invalid aws speech credential: secret_access_key is required');
-      assert(aws_region, 'invalid aws speech credential: aws_region is required');
-      const awsData = JSON.stringify({aws_region, access_key_id, secret_access_key});
+      // AWS polly can work for 3 types of credentials:
+      // 1/ access_key_id and secret_access_key
+      // 2/ RoleArn Assume role
+      // 3/ RoleArn assigned to instance profile where will run this application
+      const awsData = JSON.stringify(
+        {
+          aws_region,
+          ...(access_key_id && {access_key_id}),
+          ...(secret_access_key && {secret_access_key}),
+          ...(role_arn && {role_arn}),
+        });
      return encrypt(awsData);

    case 'microsoft':
@@ -541,12 +549,13 @@ router.get('/:sid/test', async(req, res) => {
      }
    }
    else if (cred.vendor === 'aws') {
+      const {getTtsVoices, getAwsAuthToken} = req.app.locals;
      if (cred.use_for_tts) {
-        const {getTtsVoices} = req.app.locals;
        try {
          await testAwsTts(logger, getTtsVoices, {
            accessKeyId: credential.access_key_id,
            secretAccessKey: credential.secret_access_key,
+            roleArn: credential.role_arn,
            region: credential.aws_region || process.env.AWS_REGION
          });
          results.tts.status = 'ok';
@@ -558,9 +567,10 @@ router.get('/:sid/test', async(req, res) => {
      }
      if (cred.use_for_stt) {
        try {
-          await testAwsStt(logger, {
+          await testAwsStt(logger, getAwsAuthToken, {
            accessKeyId: credential.access_key_id,
            secretAccessKey: credential.secret_access_key,
+            roleArn: credential.role_arn,
            region: credential.aws_region || process.env.AWS_REGION
          });
          results.stt.status = 'ok';
--- a/lib/routes/api/tts-cache.js
+++ b/lib/routes/api/tts-cache.js
@@ -10,6 +10,7 @@ const Account = require('../../models/account');
 const sysError = require('../error');
 const { getSpeechCredential, decryptCredential } = require('../../utils/speech-utils');
 const PCMToMP3Encoder = require('../../record/encoder');
+const { pipeline } = require('stream');

 router.delete('/', async(req, res) => {
  const {purgeTtsCache} = req.app.locals;
@@ -69,6 +70,8 @@ router.post('/Synthesize', async(req, res) => {
        voice = arr[1];
        model = arr[2];
      }
+    }  else if (cred.vendor === 'deepgram') {
+      model = voice;
    }
    const stats = {
      histogram: () => {},
@@ -84,7 +87,8 @@ router.post('/Synthesize', async(req, res) => {
      model,
      salt,
      credentials: cred,
-      disableTtsCache: false
+      disableTtsCache: false,
+      disableTtsStreaming: true
    });

    let contentType = 'audio/mpeg';
@@ -92,17 +96,27 @@ router.post('/Synthesize', async(req, res) => {
    let readStream = fs.createReadStream(filePath);
    if (['nuance', 'nvidia'].includes(cred.vendor) ||
      (
-        process.env.JAMBONES_TTS_TRIM_SILENCE &&
+        (process.env.JAMBONES_TTS_TRIM_SILENCE || !process.env.JAMBONES_DISABLE_TTS_STREAMING) &&
        ['microsoft', 'azure'].includes(cred.vendor)
      )
    ) {
      if (encodingMp3) {
-        readStream = readStream
-          .pipe(new PCMToMP3Encoder({
+        readStream = pipeline(
+          readStream,
+          new PCMToMP3Encoder({
            channels: 1,
            sampleRate: 8000,
            bitRate: 128
-          }, logger));
+          }, logger),
+          (err) => {
+            if (err) {
+              logger.error('ttscache/Synthesize failed:', err);
+              if (!res.headersSent) {
+                res.status(500).end('Server error');
+              }
+            }
+          }
+        );
      } else {
        contentType = 'application/octet-stream';
      }
@@ -110,10 +124,17 @@ router.post('/Synthesize', async(req, res) => {
    res.writeHead(200, {
      'Content-Type': contentType,
    });
-    readStream.pipe(res);
-    readStream.on('end', () => {
-      fs.unlink(filePath, (err) => {
-        if (err) throw err;
+
+    pipeline(readStream, res, (err) => {
+      if (err) {
+        logger.error('ttscache/Synthesize failed:', err);
+        if (!res.headersSent) {
+          res.status(500).end('Server error');
+        }
+      }
+
+      fs.unlink(filePath, (unlinkErr) => {
+        if (unlinkErr) throw unlinkErr;
        logger.info(`${filePath} was deleted`);
      });
    });
--- a/lib/swagger/swagger.yaml
+++ b/lib/swagger/swagger.yaml
@@ -4152,6 +4152,22 @@ paths:
                      type: string 
                    siprecServerURL:
                      type: string
+                conferenceParticipantAction:
+                  type: object
+                  properties:
+                    action:
+                      type: string
+                      enum:
+                        - tag
+                        - untag
+                        - coach
+                        - uncoach
+                        - mute
+                        - unmute
+                        - hold
+                        - unhold
+                    tag:
+                      type: string
      responses:
        200:
          description: Accepted
--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -165,16 +165,26 @@ const testAwsTts = async(logger, getTtsVoices, credentials) => {
  }
 };

-const testAwsStt = async(logger, credentials) => {
+const testAwsStt = async(logger, getAwsAuthToken, credentials) => {
  try {
-    const {region, accessKeyId, secretAccessKey} = credentials;
-    const client = new TranscribeClient({
-      region,
-      credentials: {
-        accessKeyId,
-        secretAccessKey
-      }
-    });
+    const {region, accessKeyId, secretAccessKey, roleArn} = credentials;
+    let client = null;
+    if (accessKeyId && secretAccessKey) {
+      client = new TranscribeClient({
+        region,
+        credentials: {
+          accessKeyId,
+          secretAccessKey
+        }
+      });
+    } else if (roleArn) {
+      client = new TranscribeClient({
+        region,
+        credentials: await getAwsAuthToken(null, null, region, roleArn),
+      });
+    } else {
+      client = new TranscribeClient({region});
+    }
    const command = new ListVocabulariesCommand({});
    const response =  await client.send(command);
    return response;
@@ -400,6 +410,7 @@ const getSpeechCredential = (credential, logger) => {
      ...credential,
      accessKeyId: credential.access_key_id,
      secretAccessKey: credential.secret_access_key,
+      roleArn: credential.role_arn,
      region: credential.aws_region || 'us-east-1'
    };
  }
@@ -421,6 +432,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
  else if ('aws' === obj.vendor) {
    const o = JSON.parse(decrypt(credential));
    obj.access_key_id = o.access_key_id;
+    obj.role_arn = o.role_arn;
    obj.secret_access_key = isObscureKey ? obscureKey(o.secret_access_key) : o.secret_access_key;
    obj.aws_region = o.aws_region;
    logger.info({obj, o}, 'retrieving aws speech credential');
@@ -584,6 +596,7 @@ async function getLanguagesVoicesForAws(credential, getTtsVoices, logger) {
        credentials: {
          accessKeyId: credential.access_key_id,
          secretAccessKey: credential.secret_access_key,
+          roleArn: credential.role_arn,
          region: credential.aws_region || process.env.AWS_REGION
        }
      });
--- a/package-lock.json
+++ b/package-lock.json
@@ -19,7 +19,7 @@
        "@jambonz/lamejs": "^1.2.2",
        "@jambonz/mw-registrar": "^0.2.7",
        "@jambonz/realtimedb-helpers": "^0.8.8",
-        "@jambonz/speech-utils": "^0.0.51",
+        "@jambonz/speech-utils": "^0.1.0",
        "@jambonz/time-series": "^0.2.8",
        "@jambonz/verb-specifications": "^0.0.69",
        "@soniox/soniox-node": "^1.2.2",
@@ -2027,9 +2027,9 @@
      }
    },
    "node_modules/@jambonz/speech-utils": {
-      "version": "0.0.51",
-      "resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.0.51.tgz",
-      "integrity": "sha512-3Zk2CERs1PYQiCG08NDMNBbDzBBfPuEwgADTANMP56dd07PpW360ufL8CcQfkBmWKGVma0wevRrv6DQLu2Ifdg==",
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.0.tgz",
+      "integrity": "sha512-45K6Vrl2PMEbbcnvm65afCDujDxck/bEUq7+P6KRw/cei3mrKtwjGh3HXi1cKhC1gA5UF1+5YrUoPO9LdoZnog==",
      "dependencies": {
        "@aws-sdk/client-polly": "^3.496.0",
        "@aws-sdk/client-sts": "^3.496.0",
--- a/package.json
+++ b/package.json
@@ -29,7 +29,7 @@
    "@jambonz/lamejs": "^1.2.2",
    "@jambonz/mw-registrar": "^0.2.7",
    "@jambonz/realtimedb-helpers": "^0.8.8",
-    "@jambonz/speech-utils": "^0.0.51",
+    "@jambonz/speech-utils": "^0.1.0",
    "@jambonz/time-series": "^0.2.8",
    "@jambonz/verb-specifications": "^0.0.69",
    "@soniox/soniox-node": "^1.2.2",
--- a/test/speech-credentials.js
+++ b/test/speech-credentials.js
@@ -170,6 +170,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for google tts');
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for google stt');
+
+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "en-US-Standard-C"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully google tested synthesize');
    }

    /* add / test a credential for microsoft */
@@ -198,6 +212,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for microsoft tts');
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for microsoft stt');
+
+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "en-US-AvaMultilingualNeural"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully microsoft tested synthesize');
    }

    /* add / test a credential for AWS */
@@ -227,6 +255,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for AWS tts');
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for AWS stt');
+
+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "Joanna"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully AWS tested synthesize');
    }

    /* add a credential for wellsaid */
@@ -253,6 +295,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for wellsaid');

+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "3"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully Wellsaid tested synthesize');
+
      /* delete the credential */
      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
        auth: authUser,
@@ -285,6 +341,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for deepgram');

+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "aura-asteria-en"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully deepgram tested synthesize');
+
      /* delete the credential */
      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
        auth: authUser,
@@ -404,6 +474,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for ibm stt');

+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "en-US_MichaelExpressive"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully IBM tested synthesize');
+
      /* delete the credential */
      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
        auth: authUser,
@@ -659,6 +743,30 @@ test('speech credentials tests', async(t) => {
    });
    t.ok(result.statusCode === 204, 'successfully deleted speech credential');

+    /* add a credential for aws polly by roleArn */
+    result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
+      resolveWithFullResponse: true,
+      auth: authUser,
+      json: true,
+      body: {
+        vendor: 'aws',
+        labe: 'aws_polly_with_arn',
+        use_for_tts: true,
+        use_for_stt: false,
+        role_arn: 'Arn::aws::role',
+        aws_region: 'us-east-1'
+      }
+    });
+    t.ok(result.statusCode === 201, 'successfully added speech credential for AWS Polly By RoleArn');
+    const awsPollySid = result.body.sid;
+
+    /* delete the credential */
+    result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${awsPollySid}`, {
+      auth: authUser,
+      resolveWithFullResponse: true,
+    });
+    t.ok(result.statusCode === 204, 'successfully deleted speech credential');
+
    /* Check google supportedLanguagesAndVoices */
    result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/speech/supportedLanguagesAndVoices?vendor=google`, {
      resolveWithFullResponse: true,
Author	SHA1	Message	Date
Quan HL	7f904930ae	fix azure cannot download mp3 if encodingMp3 = true	2024-05-03 19:43:45 +07:00
Hoan Luu Huu	c0fab2880b	fix cannot send multipart to aws due to min size (#319 )	2024-05-03 07:37:38 -04:00
Hoan Luu Huu	ce2fa392a4	support aws speech by roleArn (#313 ) * support aws speech by roleArn * support 3 types of aws credentials * wip * wip * update speech util version	2024-05-02 07:57:22 -04:00
Hoan Luu Huu	3b47162d13	Feat/record with pipeline (#318 ) * use pipeline for nodejs streams * use pipeline for nodejs streams	2024-04-30 07:39:24 -04:00
Hoan Luu Huu	b765232d4f	api server cannot synthesize text after upgrade latest speech-utils (#317 ) * api server cannot synthesize text after upgrade latest speech-utils * wip * add testcase for synthesize text * fix synthesize testcase	2024-04-29 19:48:34 -04:00
Dave Horton	2436bea6ea	add support for LCC updateCall with conferenceParticipantState (#296 ) * add support for LCC updateCall with conferenceParticipantState * wip * wip	2024-04-22 11:06:08 -04:00
Dave Horton	f67abddbd4	bug: attempting to add duplicate dns records on hosted system (#312 )	2024-04-19 18:13:27 -04:00