support aws speech by roleArn (#313)

* support aws speech by roleArn * support 3 types of aws credentials * wip * wip * update speech util version
2026-01-25 02:08:24 +00:00 · 2024-05-02 18:57:22 +07:00
parent 3b47162d13
commit ce2fa392a4
6 changed files with 69 additions and 20 deletions
--- a/app.js
+++ b/app.js
@@ -52,6 +52,7 @@ const {
  getTtsVoices,
  getTtsSize,
  purgeTtsCache,
+  getAwsAuthToken,
  synthAudio
 } = require('@jambonz/speech-utils')({}, logger);
 const {
@@ -95,6 +96,7 @@ app.locals = {
  deleteKey,
  getTtsVoices,
  getTtsSize,
+  getAwsAuthToken,
  purgeTtsCache,
  synthAudio,
  lookupAppBySid,
--- a/lib/routes/api/speech-credentials.js
+++ b/lib/routes/api/speech-credentials.js
@@ -113,6 +113,7 @@ const encryptCredential = (obj) => {
    secret_access_key,
    aws_region,
    api_key,
+    role_arn,
    region,
    client_id,
    secret,
@@ -155,10 +156,17 @@ const encryptCredential = (obj) => {
      return encrypt(service_key);

    case 'aws':
-      assert(access_key_id, 'invalid aws speech credential: access_key_id is required');
-      assert(secret_access_key, 'invalid aws speech credential: secret_access_key is required');
-      assert(aws_region, 'invalid aws speech credential: aws_region is required');
-      const awsData = JSON.stringify({aws_region, access_key_id, secret_access_key});
+      // AWS polly can work for 3 types of credentials:
+      // 1/ access_key_id and secret_access_key
+      // 2/ RoleArn Assume role
+      // 3/ RoleArn assigned to instance profile where will run this application
+      const awsData = JSON.stringify(
+        {
+          aws_region,
+          ...(access_key_id && {access_key_id}),
+          ...(secret_access_key && {secret_access_key}),
+          ...(role_arn && {role_arn}),
+        });
      return encrypt(awsData);

    case 'microsoft':
@@ -541,12 +549,13 @@ router.get('/:sid/test', async(req, res) => {
      }
    }
    else if (cred.vendor === 'aws') {
+      const {getTtsVoices, getAwsAuthToken} = req.app.locals;
      if (cred.use_for_tts) {
-        const {getTtsVoices} = req.app.locals;
        try {
          await testAwsTts(logger, getTtsVoices, {
            accessKeyId: credential.access_key_id,
            secretAccessKey: credential.secret_access_key,
+            roleArn: credential.role_arn,
            region: credential.aws_region || process.env.AWS_REGION
          });
          results.tts.status = 'ok';
@@ -558,9 +567,10 @@ router.get('/:sid/test', async(req, res) => {
      }
      if (cred.use_for_stt) {
        try {
-          await testAwsStt(logger, {
+          await testAwsStt(logger, getAwsAuthToken, {
            accessKeyId: credential.access_key_id,
            secretAccessKey: credential.secret_access_key,
+            roleArn: credential.role_arn,
            region: credential.aws_region || process.env.AWS_REGION
          });
          results.stt.status = 'ok';
--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -165,16 +165,26 @@ const testAwsTts = async(logger, getTtsVoices, credentials) => {
  }
 };

-const testAwsStt = async(logger, credentials) => {
+const testAwsStt = async(logger, getAwsAuthToken, credentials) => {
  try {
-    const {region, accessKeyId, secretAccessKey} = credentials;
-    const client = new TranscribeClient({
+    const {region, accessKeyId, secretAccessKey, roleArn} = credentials;
+    let client = null;
+    if (accessKeyId && secretAccessKey) {
+      client = new TranscribeClient({
        region,
        credentials: {
          accessKeyId,
          secretAccessKey
        }
      });
+    } else if (roleArn) {
+      client = new TranscribeClient({
+        region,
+        credentials: await getAwsAuthToken(null, null, region, roleArn),
+      });
+    } else {
+      client = new TranscribeClient({region});
+    }
    const command = new ListVocabulariesCommand({});
    const response =  await client.send(command);
    return response;
@@ -400,6 +410,7 @@ const getSpeechCredential = (credential, logger) => {
      ...credential,
      accessKeyId: credential.access_key_id,
      secretAccessKey: credential.secret_access_key,
+      roleArn: credential.role_arn,
      region: credential.aws_region || 'us-east-1'
    };
  }
@@ -421,6 +432,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
  else if ('aws' === obj.vendor) {
    const o = JSON.parse(decrypt(credential));
    obj.access_key_id = o.access_key_id;
+    obj.role_arn = o.role_arn;
    obj.secret_access_key = isObscureKey ? obscureKey(o.secret_access_key) : o.secret_access_key;
    obj.aws_region = o.aws_region;
    logger.info({obj, o}, 'retrieving aws speech credential');
@@ -584,6 +596,7 @@ async function getLanguagesVoicesForAws(credential, getTtsVoices, logger) {
        credentials: {
          accessKeyId: credential.access_key_id,
          secretAccessKey: credential.secret_access_key,
+          roleArn: credential.role_arn,
          region: credential.aws_region || process.env.AWS_REGION
        }
      });
--- a/package-lock.json
+++ b/package-lock.json
@@ -19,7 +19,7 @@
        "@jambonz/lamejs": "^1.2.2",
        "@jambonz/mw-registrar": "^0.2.7",
        "@jambonz/realtimedb-helpers": "^0.8.8",
-        "@jambonz/speech-utils": "^0.0.51",
+        "@jambonz/speech-utils": "^0.1.0",
        "@jambonz/time-series": "^0.2.8",
        "@jambonz/verb-specifications": "^0.0.69",
        "@soniox/soniox-node": "^1.2.2",
@@ -2027,9 +2027,9 @@
      }
    },
    "node_modules/@jambonz/speech-utils": {
-      "version": "0.0.51",
-      "resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.0.51.tgz",
-      "integrity": "sha512-3Zk2CERs1PYQiCG08NDMNBbDzBBfPuEwgADTANMP56dd07PpW360ufL8CcQfkBmWKGVma0wevRrv6DQLu2Ifdg==",
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.0.tgz",
+      "integrity": "sha512-45K6Vrl2PMEbbcnvm65afCDujDxck/bEUq7+P6KRw/cei3mrKtwjGh3HXi1cKhC1gA5UF1+5YrUoPO9LdoZnog==",
      "dependencies": {
        "@aws-sdk/client-polly": "^3.496.0",
        "@aws-sdk/client-sts": "^3.496.0",
--- a/package.json
+++ b/package.json
@@ -29,7 +29,7 @@
    "@jambonz/lamejs": "^1.2.2",
    "@jambonz/mw-registrar": "^0.2.7",
    "@jambonz/realtimedb-helpers": "^0.8.8",
-    "@jambonz/speech-utils": "^0.0.51",
+    "@jambonz/speech-utils": "^0.1.0",
    "@jambonz/time-series": "^0.2.8",
    "@jambonz/verb-specifications": "^0.0.69",
    "@soniox/soniox-node": "^1.2.2",
--- a/test/speech-credentials.js
+++ b/test/speech-credentials.js
@@ -743,6 +743,30 @@ test('speech credentials tests', async(t) => {
    });
    t.ok(result.statusCode === 204, 'successfully deleted speech credential');

+    /* add a credential for aws polly by roleArn */
+    result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
+      resolveWithFullResponse: true,
+      auth: authUser,
+      json: true,
+      body: {
+        vendor: 'aws',
+        labe: 'aws_polly_with_arn',
+        use_for_tts: true,
+        use_for_stt: false,
+        role_arn: 'Arn::aws::role',
+        aws_region: 'us-east-1'
+      }
+    });
+    t.ok(result.statusCode === 201, 'successfully added speech credential for AWS Polly By RoleArn');
+    const awsPollySid = result.body.sid;
+
+    /* delete the credential */
+    result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${awsPollySid}`, {
+      auth: authUser,
+      resolveWithFullResponse: true,
+    });
+    t.ok(result.statusCode === 204, 'successfully deleted speech credential');
+
    /* Check google supportedLanguagesAndVoices */
    result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/speech/supportedLanguagesAndVoices?vendor=google`, {
      resolveWithFullResponse: true,