From 5a7e0d37f4b9a256c8138c735569d63bc0a3244b Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Fri, 24 Mar 2023 14:05:18 -0400 Subject: [PATCH] add support for Nuance TTS on-prem (vs hosted) --- lib/get-tts-voices.js | 15 ++++++++++----- lib/synth-audio.js | 22 +++++++++++++++------- lib/utils.js | 6 ++++++ test/nuance.js | 39 +++++++++++++++++++++++++++++++++++---- test/synth.js | 39 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 104 insertions(+), 17 deletions(-) diff --git a/lib/get-tts-voices.js b/lib/get-tts-voices.js index d2236a9..c7a901c 100644 --- a/lib/get-tts-voices.js +++ b/lib/get-tts-voices.js @@ -1,5 +1,5 @@ const assert = require('assert'); -const {noopLogger, createNuanceClient} = require('./utils'); +const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils'); const getNuanceAccessToken = require('./get-nuance-access-token'); const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb'); const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1'); @@ -21,15 +21,20 @@ const getIbmVoices = async(client, logger, credentials) => { }; const getNuanceVoices = async(client, logger, credentials) => { - const {client_id: clientId, secret: secret} = credentials; + const {client_id: clientId, secret: secret, nuance_tts_uri} = credentials; return new Promise(async(resolve, reject) => { /* get a nuance access token */ let token, nuanceClient; try { - const access_token = await getNuanceAccessToken(client, logger, clientId, secret, 'tts'); - token = access_token.access_token; - nuanceClient = await createNuanceClient(token); + if (nuance_tts_uri) { + nuanceClient = await createKryptonClient(nuance_tts_uri); + } + else { + const access_token = await getNuanceAccessToken(client, logger, clientId, secret, 'tts'); + token = access_token.access_token; + nuanceClient = await createNuanceClient(token); + } } catch (err) { logger.error({err}, 'getTtsVoices: error retrieving access token'); return reject(err); diff --git a/lib/synth-audio.js b/lib/synth-audio.js index ad1bad3..d4f13b5 100644 --- a/lib/synth-audio.js +++ b/lib/synth-audio.js @@ -16,7 +16,7 @@ const { CancellationDetails, SpeechSynthesisOutputFormat } = sdk; -const {makeSynthKey, createNuanceClient, noopLogger, createRivaClient} = require('./utils'); +const {makeSynthKey, createNuanceClient, createKryptonClient, createRivaClient, noopLogger} = require('./utils'); const getNuanceAccessToken = require('./get-nuance-access-token'); const { SynthesisRequest, @@ -75,8 +75,10 @@ async function synthAudio(client, logger, stats, { account_sid, } else if ('nuance' === vendor) { assert.ok(voice, 'synthAudio requires voice when nuance is used'); - assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used'); - assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used'); + if (!credentials.nuance_tts_uri) { + assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used'); + assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used'); + } } else if ('nvidia' === vendor) { assert.ok(voice, 'synthAudio requires voice when nvidia is used'); @@ -371,10 +373,16 @@ const synthWellSaid = async(logger, {credentials, stats, language, voice, gender }; const synthNuance = async(client, logger, {credentials, stats, voice, model, text}) => { - /* get a nuance access token */ - const {client_id, secret} = credentials; - const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts'); - const nuanceClient = await createNuanceClient(access_token); + let nuanceClient; + const {client_id, secret, nuance_tts_uri} = credentials; + if (nuance_tts_uri) { + nuanceClient = await createKryptonClient(nuance_tts_uri); + } + else { + /* get a nuance access token */ + const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts'); + nuanceClient = await createNuanceClient(access_token); + } const v = new Voice(); const p = new AudioParameters(); diff --git a/lib/utils.js b/lib/utils.js index 7fced9f..ba91d27 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -77,6 +77,11 @@ const getNuanceAccessToken = async(clientId, secret, scope = 'asr tts') => { return json.access_token; }; +const createKryptonClient = async(uri) => { + const client = new SynthesizerClient(uri, grpc.credentials.createInsecure()); + return client; +}; + const createNuanceClient = async(access_token) => { //if (nuanceClientMap.has(access_token)) return nuanceClientMap.get(access_token); @@ -108,6 +113,7 @@ module.exports = { makeIbmKey, getNuanceAccessToken, createNuanceClient, + createKryptonClient, createRivaClient, makeBasicAuthHeader, NUANCE_AUTH_ENDPOINT, diff --git a/test/nuance.js b/test/nuance.js index f30c8f6..299b504 100644 --- a/test/nuance.js +++ b/test/nuance.js @@ -12,12 +12,12 @@ const stats = { histogram: () => {} }; -test('Nuance tests', async(t) => { +test('Nuance hosted tests', async(t) => { const fn = require('..'); const {client, getTtsVoices} = fn(opts, logger); if (!process.env.NUANCE_CLIENT_ID || !process.env.NUANCE_SECRET ) { - t.pass('skipping Nuance test since no Nuance client_id and secret provided'); + t.pass('skipping Nuance hosted test since no Nuance client_id and secret provided'); t.end(); client.quit(); return; @@ -31,8 +31,39 @@ test('Nuance tests', async(t) => { } }; let voices = await getTtsVoices(opts); - //console.log(`received ${voices.length} voices from Nuance`); - //console.log(JSON.stringify(voices)); + t.ok(voices.length > 0 && voices[0].language, + `GetVoices: successfully retrieved ${voices.length} voices from Nuance`); + + await client.flushallAsync(); + + t.end(); + + } + catch (err) { + console.error(err); + t.end(err); + } + client.quit(); +}); + +test('Nuance on-prem tests', async(t) => { + const fn = require('..'); + const {client, getTtsVoices} = fn(opts, logger); + + if (!process.env.NUANCE_TTS_URI ) { + t.pass('skipping Nuance on-prem test since no Nuance uri provided'); + t.end(); + client.quit(); + return; + } + try { + const opts = { + vendor: 'nuance', + credentials: { + nuance_tts_uri: process.env.NUANCE_TTS_URI + } + }; + let voices = await getTtsVoices(opts); t.ok(voices.length > 0 && voices[0].language, `GetVoices: successfully retrieved ${voices.length} voices from Nuance`); diff --git a/test/synth.js b/test/synth.js index cca1415..12b42c6 100644 --- a/test/synth.js +++ b/test/synth.js @@ -212,7 +212,7 @@ test('Azure custom voice speech synth tests', async(t) => { client.quit(); }); -test('Nuance speech synth tests', async(t) => { +test('Nuance hosted speech synth tests', async(t) => { const fn = require('..'); const {synthAudio, client} = fn(opts, logger); @@ -251,6 +251,43 @@ test('Nuance speech synth tests', async(t) => { client.quit(); }); +test('Nuance on-prem speech synth tests', async(t) => { + const fn = require('..'); + const {synthAudio, client} = fn(opts, logger); + + if (!process.env.NUANCE_TTS_URI) { + t.pass('skipping Nuance on prem speech synth tests since NUANCE_TTS_URI not provided'); + return t.end(); + } + try { + let opts = await synthAudio(stats, { + vendor: 'nuance', + credentials: { + nuance_tts_uri: process.env.NUANCE_TTS_URI + }, + language: 'en-US', + voice: 'Evan', + text: 'This is a test of on-prem. This is only a test', + }); + t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`); + + opts = await synthAudio(stats, { + vendor: 'nuance', + credentials: { + nuance_tts_uri: process.env.NUANCE_TTS_URI + }, + language: 'en-US', + voice: 'Evan', + text: 'This is a test of on-prem. This is only a test', + }); + t.ok(opts.servedFromCache, `successfully retrieved nuance audio from cache ${opts.filePath}`); + } catch (err) { + console.error(err); + t.end(err); + } + client.quit(); +}); + test('Nvidia speech synth tests', async(t) => { const fn = require('..'); const {synthAudio, client} = fn(opts, logger);