add support for Nuance TTS on-prem (vs hosted)

2026-07-24 05:01:51 +00:00 · 2023-03-24 14:05:18 -04:00
parent 6fa68bc712
commit 5a7e0d37f4
5 changed files with 104 additions and 17 deletions
@@ -1,5 +1,5 @@
 const assert = require('assert');
-const {noopLogger, createNuanceClient} = require('./utils');
+const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils');
 const getNuanceAccessToken = require('./get-nuance-access-token');
 const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
 const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
@@ -21,15 +21,20 @@ const getIbmVoices = async(client, logger, credentials) => {
 };

 const getNuanceVoices = async(client, logger, credentials) => {
-  const {client_id: clientId, secret: secret} = credentials;
+  const {client_id: clientId, secret: secret, nuance_tts_uri} = credentials;

  return new Promise(async(resolve, reject) => {
    /* get a nuance access token */
    let token, nuanceClient;
    try {
-      const access_token = await getNuanceAccessToken(client, logger, clientId, secret, 'tts');
-      token = access_token.access_token;
-      nuanceClient = await createNuanceClient(token);
+      if (nuance_tts_uri) {
+        nuanceClient = await createKryptonClient(nuance_tts_uri);
+      }
+      else {
+        const access_token = await getNuanceAccessToken(client, logger, clientId, secret, 'tts');
+        token = access_token.access_token;
+        nuanceClient = await createNuanceClient(token);
+      }
    } catch (err) {
      logger.error({err}, 'getTtsVoices: error retrieving access token');
      return reject(err);
@@ -16,7 +16,7 @@ const {
  CancellationDetails,
  SpeechSynthesisOutputFormat
 } = sdk;
-const {makeSynthKey, createNuanceClient, noopLogger, createRivaClient} = require('./utils');
+const {makeSynthKey, createNuanceClient, createKryptonClient, createRivaClient, noopLogger} = require('./utils');
 const getNuanceAccessToken = require('./get-nuance-access-token');
 const {
  SynthesisRequest,
@@ -75,8 +75,10 @@ async function synthAudio(client, logger, stats, { account_sid,
  }
  else if ('nuance' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when nuance is used');
-    assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used');
-    assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used');
+    if (!credentials.nuance_tts_uri) {
+      assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used');
+      assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used');
+    }
  }
  else if ('nvidia' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when nvidia is used');
@@ -371,10 +373,16 @@ const synthWellSaid = async(logger, {credentials, stats, language, voice, gender
 };

 const synthNuance = async(client, logger, {credentials, stats, voice, model, text}) => {
-  /* get a nuance access token */
-  const {client_id, secret} = credentials;
-  const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts');
-  const nuanceClient = await createNuanceClient(access_token);
+  let nuanceClient;
+  const {client_id, secret, nuance_tts_uri} = credentials;
+  if (nuance_tts_uri) {
+    nuanceClient = await createKryptonClient(nuance_tts_uri);
+  }
+  else {
+    /* get a nuance access token */
+    const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts');
+    nuanceClient = await createNuanceClient(access_token);
+  }

  const v = new Voice();
  const p = new AudioParameters();
@@ -77,6 +77,11 @@ const getNuanceAccessToken = async(clientId, secret, scope = 'asr tts') => {
  return json.access_token;
 };

+const createKryptonClient = async(uri) => {
+  const client = new SynthesizerClient(uri, grpc.credentials.createInsecure());
+  return client;
+};
+
 const createNuanceClient = async(access_token) => {

  //if (nuanceClientMap.has(access_token)) return nuanceClientMap.get(access_token);
@@ -108,6 +113,7 @@ module.exports = {
  makeIbmKey,
  getNuanceAccessToken,
  createNuanceClient,
+  createKryptonClient,
  createRivaClient,
  makeBasicAuthHeader,
  NUANCE_AUTH_ENDPOINT,
@@ -12,12 +12,12 @@ const stats = {
  histogram: () => {}
 };

-test('Nuance tests', async(t) => {
+test('Nuance hosted tests', async(t) => {
  const fn = require('..');
  const {client, getTtsVoices} = fn(opts, logger);

  if (!process.env.NUANCE_CLIENT_ID || !process.env.NUANCE_SECRET ) {
-      t.pass('skipping Nuance test since no Nuance client_id and secret provided');
+      t.pass('skipping Nuance hosted test since no Nuance client_id and secret provided');
      t.end();
      client.quit();
      return;
@@ -31,8 +31,39 @@ test('Nuance tests', async(t) => {
      }
    };
    let voices = await getTtsVoices(opts);
-    //console.log(`received ${voices.length} voices from Nuance`);
-    //console.log(JSON.stringify(voices));
+    t.ok(voices.length > 0 && voices[0].language, 
+      `GetVoices: successfully retrieved ${voices.length} voices from Nuance`);
+
+    await client.flushallAsync();
+
+    t.end();
+
+  }
+  catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('Nuance on-prem tests', async(t) => {
+  const fn = require('..');
+  const {client, getTtsVoices} = fn(opts, logger);
+
+  if (!process.env.NUANCE_TTS_URI ) {
+      t.pass('skipping Nuance on-prem test since no Nuance uri provided');
+      t.end();
+      client.quit();
+      return;
+  }
+  try {
+    const opts = {
+      vendor: 'nuance',
+      credentials: {
+        nuance_tts_uri: process.env.NUANCE_TTS_URI
+      }
+    };
+    let voices = await getTtsVoices(opts);
    t.ok(voices.length > 0 && voices[0].language, 
      `GetVoices: successfully retrieved ${voices.length} voices from Nuance`);

@@ -212,7 +212,7 @@ test('Azure custom voice speech synth tests', async(t) => {
  client.quit();
 });

-test('Nuance speech synth tests', async(t) => {
+test('Nuance hosted speech synth tests', async(t) => {
  const fn = require('..');
  const {synthAudio, client} = fn(opts, logger);

@@ -251,6 +251,43 @@ test('Nuance speech synth tests', async(t) => {
  client.quit();
 });

+test('Nuance on-prem speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.NUANCE_TTS_URI) {
+    t.pass('skipping Nuance on prem speech synth tests since NUANCE_TTS_URI not provided');
+    return t.end();
+  }
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'nuance',
+      credentials: {
+        nuance_tts_uri: process.env.NUANCE_TTS_URI
+      },
+      language: 'en-US',
+      voice: 'Evan',
+      text: 'This is a test of on-prem.  This is only a test',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'nuance',
+      credentials: {
+        nuance_tts_uri: process.env.NUANCE_TTS_URI
+      },
+      language: 'en-US',
+      voice: 'Evan',
+      text: 'This is a test of on-prem.  This is only a test',
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved nuance audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
 test('Nvidia speech synth tests', async(t) => {
  const fn = require('..');
  const {synthAudio, client} = fn(opts, logger);