feat: add nuance, riva, ibm

2025-12-18 19:27:46 +00:00 · 2023-02-21 08:49:15 +07:00
parent deee11b3f3
commit 1fed5aefab
23 changed files with 6779 additions and 0 deletions
--- a/config/test.json
+++ b/config/test.json
@@ -0,0 +1,13 @@
+{
+  "logging": {
+    "level": "error"
+  },
+  "redis": {
+    "host": "127.0.0.1",
+    "port": 3379
+  },
+  "redis-auth": {
+    "host": "127.0.0.1",
+    "port": 3380
+  }
+}
--- a/index.js
+++ b/index.js
@@ -0,0 +1,32 @@
+const {noopLogger} = require('./lib/utils');
+const promisify = require('@jambonz/promisify-redis');
+const redis = promisify(require('redis'));
+
+module.exports = (opts, logger) => {
+  const {host = '127.0.0.1', port = 6379, tls = false} = opts;
+  logger = logger || noopLogger;
+
+  const url = process.env.JAMBONES_REDIS_USERNAME && process.env.JAMBONES_REDIS_PASSWORD ?
+    `${process.env.JAMBONES_REDIS_USERNAME}:${process.env.JAMBONES_REDIS_PASSWORD}@${host}:${port}` :
+    `${host}:${port}`;
+  const client = redis.createClient(tls ? `rediss://${url}` : `redis://${url}`);
+  ['ready', 'connect', 'reconnecting', 'error', 'end', 'warning']
+    .forEach((event) => {
+      client.on(event, (...args) => {
+        if ('error' === event) {
+          if (process.env.NODE_ENV === 'test' && args[0]?.code === 'ECONNREFUSED') return;
+          logger.error({...args}, '@jambonz/realtimedb-helpers - redis error');
+        }
+        else logger.debug({args}, `redis event ${event}`);
+      });
+    });
+
+  return {
+    client,
+    purgeTtsCache: require('./lib/purge-tts-cache').bind(null, client, logger),
+    synthAudio: require('./lib/synth-audio').bind(null, client, logger),
+    getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
+    getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
+    getTtsVoices: require('./lib/get-tts-voices').bind(null, client, logger),
+  };
+};
--- a/lib/get-ibm-access-token.js
+++ b/lib/get-ibm-access-token.js
@@ -0,0 +1,48 @@
+const formurlencoded = require('form-urlencoded');
+const {Pool} = require('undici');
+const pool = new Pool('https://iam.cloud.ibm.com');
+const {makeIbmKey, noopLogger} = require('./utils');
+const debug = require('debug')('jambonz:realtimedb-helpers');
+const HTTP_TIMEOUT = 5000;
+
+async function getIbmAccessToken(client, logger, apiKey) {
+  logger = logger || noopLogger;
+  try {
+    const key = makeIbmKey(apiKey);
+    const access_token = await client.getAsync(key);
+    if (access_token) return {access_token, servedFromCache: true};
+
+    /* access token not found in cache, so fetch it from Ibm */
+    const payload = {
+      grant_type: 'urn:ibm:params:oauth:grant-type:apikey',
+      apikey: apiKey
+    };
+    const {statusCode, headers, body} =  await pool.request({
+      path: '/identity/token',
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/x-www-form-urlencoded'
+      },
+      body: formurlencoded(payload),
+      timeout: HTTP_TIMEOUT,
+      followRedirects: false
+    });
+
+    if (200 !== statusCode) {
+      const json = await body.json();
+      logger.debug({statusCode, headers, body: json}, 'error fetching access token from Ibm');
+      const err = new Error();
+      err.statusCode = statusCode;
+      throw err;
+    }
+    const json = await body.json();
+    await client.set(key, json.access_token, 'EX', json.expires_in - 30);
+    return {...json, servedFromCache: false};
+  } catch (err) {
+    debug(err, 'getIbmAccessToken: Error retrieving Ibm access token');
+    logger.error(err, 'getIbmAccessToken: Error retrieving Ibm access token for client_id ${clientId}');
+    throw err;
+  }
+}
+
+module.exports = getIbmAccessToken;
--- a/lib/get-nuance-access-token.js
+++ b/lib/get-nuance-access-token.js
@@ -0,0 +1,49 @@
+const formurlencoded = require('form-urlencoded');
+const {Pool} = require('undici');
+const pool = new Pool('https://auth.crt.nuance.com');
+const {makeNuanceKey, makeBasicAuthHeader, noopLogger} = require('./utils');
+const debug = require('debug')('jambonz:realtimedb-helpers');
+const HTTP_TIMEOUT = 5000;
+
+async function getNuanceAccessToken(client, logger, clientId, secret, scope) {
+  logger = logger || noopLogger;
+  try {
+    const key = makeNuanceKey(clientId, secret, scope);
+    const access_token = await client.getAsync(key);
+    if (access_token) return {access_token, servedFromCache: true};
+
+    /* access token not found in cache, so fetch it from Nuance */
+    const payload = {
+      grant_type: 'client_credentials',
+      scope
+    };
+    const auth = makeBasicAuthHeader(clientId, secret);
+    const {statusCode, headers, body} =  await pool.request({
+      path: '/oauth2/token',
+      method: 'POST',
+      headers: {
+        ...auth,
+        'Content-Type': 'application/x-www-form-urlencoded'
+      },
+      body: formurlencoded(payload),
+      timeout: HTTP_TIMEOUT,
+      followRedirects: false
+    });
+
+    if (200 !== statusCode) {
+      logger.debug({statusCode, headers, body: body.text()}, 'error fetching access token from Nuance');
+      const err = new Error();
+      err.statusCode = statusCode;
+      throw err;
+    }
+    const json = await body.json();
+    await client.set(key, json.access_token, 'EX', json.expires_in - 30);
+    return {...json, servedFromCache: false};
+  } catch (err) {
+    debug(err, `getNuanceAccessToken: Error retrieving Nuance access token for client_id ${clientId}`);
+    logger.error(err, `getNuanceAccessToken: Error retrieving Nuance access token for client_id ${clientId}`);
+    throw err;
+  }
+}
+
+module.exports = getNuanceAccessToken;
--- a/lib/get-tts-voices.js
+++ b/lib/get-tts-voices.js
@@ -0,0 +1,111 @@
+const assert = require('assert');
+const {noopLogger, createNuanceClient} = require('./utils');
+const getNuanceAccessToken = require('./get-nuance-access-token');
+const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
+const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
+const { IamAuthenticator } = require('ibm-watson/auth');
+
+const getIbmVoices = async(client, logger, credentials) => {
+  const {tts_region, tts_api_key} = credentials;
+  console.log(`region: ${tts_region}, api_key: ${tts_api_key}`);
+
+  const textToSpeech = new TextToSpeechV1({
+    authenticator: new IamAuthenticator({
+      apikey: tts_api_key,
+    }),
+    serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
+  });
+
+  const voices = await textToSpeech.listVoices();
+  return voices;
+};
+
+const getNuanceVoices = async(client, logger, credentials) => {
+  const {client_id: clientId, secret: secret} = credentials;
+
+  return new Promise(async(resolve, reject) => {
+    /* get a nuance access token */
+    let token, nuanceClient;
+    try {
+      const access_token = await getNuanceAccessToken(client, logger, clientId, secret, 'tts');
+      token = access_token.access_token;
+      nuanceClient = await createNuanceClient(token);
+    } catch (err) {
+      logger.error({err}, 'getTtsVoices: error retrieving access token');
+      return reject(err);
+    }
+    /* retrieve all voices */
+    const v = new Voice();
+    const request = new GetVoicesRequest();
+    request.setVoice(v);
+
+    nuanceClient.getVoices(request, (err, response) => {
+      if (err) {
+        logger.error({err, clientId, secret, token}, 'getTtsVoices: error retrieving voices');
+        return reject(err);
+      }
+
+      /* return all the voices that are not restricted and eliminate duplicates */
+      const voices = response.getVoicesList()
+        .map((v) => {
+          return {
+            language: v.getLanguage(),
+            name: v.getName(),
+            model: v.getModel(),
+            gender: v.getGender() === 1 ? 'male' : 'female',
+            restricted: v.getRestricted()
+          };
+        });
+      const v = voices
+        .filter((v) => v.restricted === false)
+        .map((v) => {
+          delete v.restricted;
+          return v;
+        })
+        .sort((a, b) => {
+          if (a.language < b.language) return -1;
+          if (a.language > b.language) return 1;
+          if (a.name < b.name) return -1;
+          return 1;
+        });
+      const arr = [...new Set(v.map((v) => JSON.stringify(v)))]
+        .map((v) => JSON.parse(v));
+      resolve(arr);
+    });
+  });
+};
+
+/**
+ * Synthesize speech to an mp3 file, and also cache the generated speech
+ * in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
+ * time and again for speech synthesis of the same text.
+ * It is the responsibility of the caller to unlink the mp3 file after use.
+ *
+ * @param {*} client - redis client
+ * @param {*} logger - pino logger
+ * @param {object} opts - options
+ * @param {string} opts.vendor - 'google' or 'aws' ('polly' is an alias for 'aws')
+ * @param {string} opt.language - language code
+ * @param {string} opts.voice - voice identifier
+ * @param {string} opts.text - text or ssml to synthesize
+ * @returns object containing filepath to an mp3 file in the /tmp folder containing
+ * the synthesized audio, and a variable indicating whether it was served from cache
+ */
+async function getTtsVoices(client, logger, {vendor, credentials}) {
+  logger = logger || noopLogger;
+
+  assert.ok(['nuance', 'ibm'].includes(vendor),
+    `getTtsVoices not supported for vendor ${vendor}`);
+
+  switch (vendor) {
+    case 'nuance':
+      return getNuanceVoices(client, logger, credentials);
+    case 'ibm':
+      return getIbmVoices(client, logger, credentials);
+    default:
+      break;
+  }
+}
+
+
+module.exports = getTtsVoices;
--- a/lib/purge-tts-cache.js
+++ b/lib/purge-tts-cache.js
@@ -0,0 +1,46 @@
+const {noopLogger, makeSynthKey} = require('./utils');
+const debug = require('debug')('jambonz:realtimedb-helpers');
+
+/**
+ * Scan TTS Cache and purge records, use specific settings to purge just one
+ * @param {object} opts - options
+ * @param {boolean} opts.all - purge all records or only one specific, true by default
+ * @param {string} opts.vendor - 'google' or 'aws' ('polly' is an alias for 'aws')
+ * @param {string} opts.language - language code
+ * @param {string} opts.voice - voice identifier
+ * @param {string} opts.text - text or ssml to synthesize
+ * @returns {object} result - {error, purgedCount}
+ */
+async function purgeTtsCache(client, logger, {all, vendor, language, voice, deploymentId, engine, text} = {all: true}) {
+  logger = logger || noopLogger;
+
+  let purgedCount = 0, error;
+
+  try {
+    if (all) {
+      const keys = await client.keysAsync('tts:*');
+      purgedCount = await client.delAsync(keys);
+
+    } else {
+      const key = makeSynthKey({
+        vendor,
+        language: language || '',
+        voice: voice || deploymentId,
+        engine,
+        text,
+      });
+      purgedCount = await client.delAsync(key);
+      if (purgedCount === 0) error = 'Specified item not found';
+    }
+
+  } catch (err) {
+    debug(err, 'purgeTtsCache: Error');
+    logger.error(err, 'purgeTtsCache: Error');
+    error = err.message ?? 'Unknown Error';
+  }
+
+  logger.info(`purgeTtsCache: purged ${purgedCount} records`);
+  return {error, purgedCount};
+}
+
+module.exports = purgeTtsCache;
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -0,0 +1,436 @@
+const assert = require('assert');
+const fs = require('fs');
+const bent = require('bent');
+const ttsGoogle = require('@google-cloud/text-to-speech');
+//const Polly = require('aws-sdk/clients/polly');
+const { PollyClient, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly');
+
+const sdk = require('microsoft-cognitiveservices-speech-sdk');
+const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
+const { IamAuthenticator } = require('ibm-watson/auth');
+const {
+  AudioConfig,
+  ResultReason,
+  SpeechConfig,
+  SpeechSynthesizer,
+  CancellationDetails,
+  SpeechSynthesisOutputFormat
+} = sdk;
+const {makeSynthKey, createNuanceClient, noopLogger, createRivaClient} = require('./utils');
+const getNuanceAccessToken = require('./get-nuance-access-token');
+const {
+  SynthesisRequest,
+  Voice,
+  AudioFormat,
+  AudioParameters,
+  PCM,
+  Input,
+  Text,
+  SSML,
+  EventParameters
+} = require('../stubs/nuance/synthesizer_pb');
+const {SynthesizeSpeechRequest} = require('../stubs/riva/proto/riva_tts_pb');
+const {AudioEncoding} = require('../stubs/riva/proto/riva_audio_pb');
+const debug = require('debug')('jambonz:realtimedb-helpers');
+const EXPIRES = 3600 * 24; // cache tts for 24 hours
+const TMP_FOLDER = '/tmp';
+
+/**
+ * Synthesize speech to an mp3 file, and also cache the generated speech
+ * in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
+ * time and again for speech synthesis of the same text.
+ * It is the responsibility of the caller to unlink the mp3 file after use.
+ *
+ * @param {*} client - redis client
+ * @param {*} logger - pino logger
+ * @param {object} opts - options
+ * @param {string} opts.vendor - 'google' or 'aws' ('polly' is an alias for 'aws')
+ * @param {string} opt.language - language code
+ * @param {string} opts.voice - voice identifier
+ * @param {string} opts.text - text or ssml to synthesize
+ * @param {boolean} opts.disableTtsCache - disable TTS Cache retrieval
+ * @returns object containing filepath to an mp3 file in the /tmp folder containing
+ * the synthesized audio, and a variable indicating whether it was served from cache
+ */
+async function synthAudio(client, logger, stats, {
+  vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId, disableTtsCache
+}) {
+  let audioBuffer;
+  let servedFromCache = false;
+  let rtt;
+  logger = logger || noopLogger;
+
+  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor),
+    `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
+  if ('google' === vendor) {
+    assert.ok(language, 'synthAudio requires language when google is used');
+  }
+  else if (['aws', 'polly'].includes(vendor))  {
+    assert.ok(voice, 'synthAudio requires voice when aws polly is used');
+  }
+  else if ('microsoft' === vendor) {
+    assert.ok(language || deploymentId, 'synthAudio requires language when microsoft is used');
+    assert.ok(voice || deploymentId, 'synthAudio requires voice when microsoft is used');
+  }
+  else if ('nuance' === vendor) {
+    assert.ok(voice, 'synthAudio requires voice when nuance is used');
+    assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used');
+    assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used');
+  }
+  else if ('nvidia' === vendor) {
+    assert.ok(voice, 'synthAudio requires voice when nvidia is used');
+    assert.ok(language, 'synthAudio requires language when nvidia is used');
+    assert.ok(credentials.riva_uri, 'synthAudio requires riva_uri in credentials when nuance is used');
+  }
+  else if ('ibm' === vendor) {
+    assert.ok(voice, 'synthAudio requires voice when ibm is used');
+    assert.ok(credentials.tts_region, 'synthAudio requires tts_region in credentials when ibm watson is used');
+    assert.ok(credentials.tts_api_key, 'synthAudio requires tts_api_key in credentials when nuance is used');
+  }
+  else if ('wellsaid' === vendor) {
+    language = 'en-US'; // WellSaid only supports English atm
+    assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
+    assert.ok(!text.startsWith('<speak'), 'wellsaid does not support SSML tags');
+  }
+
+  const key = makeSynthKey({
+    vendor,
+    language: language || '',
+    voice: voice || deploymentId,
+    engine,
+    text
+  });
+  let filePath;
+  if (['nuance', 'nvidia'].includes(vendor)) {
+    filePath = `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt || ''}`)}.r8`;
+  }
+  else filePath = `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt || ''}`)}.mp3`;
+  debug(`synth key is ${key}`);
+  let cached;
+  if (!disableTtsCache) {
+    cached = await client.getAsync(key);
+  }
+  if (cached) {
+    // found in cache - extend the expiry and use it
+    debug('result WAS found in cache');
+    servedFromCache = true;
+    stats.increment('tts.cache.requests', ['found:yes']);
+    audioBuffer = Buffer.from(cached, 'base64');
+    client.expireAsync(key, EXPIRES).catch((err) => logger.info(err, 'Error setting expires'));
+  }
+  if (!cached) {
+    // not found in cache - go get it from speech vendor and add to cache
+    debug('result was NOT found in cache');
+    stats.increment('tts.cache.requests', ['found:no']);
+    let vendorLabel = vendor;
+    const startAt = process.hrtime();
+    switch (vendor) {
+      case 'google':
+        audioBuffer = await synthGoogle(logger, {credentials, stats, language, voice, gender, text});
+        break;
+      case 'aws':
+      case 'polly':
+        vendorLabel = 'aws';
+        audioBuffer = await synthPolly(logger, {credentials, stats, language, voice, text, engine});
+        break;
+      case 'azure':
+      case 'microsoft':
+        vendorLabel = 'microsoft';
+        audioBuffer = await synthMicrosoft(logger, {credentials, stats, language, voice, text, deploymentId, filePath});
+        break;
+      case 'nuance':
+        model = model || 'enhanced';
+        audioBuffer = await synthNuance(client, logger, {credentials, stats, voice, model, text});
+        break;
+      case 'nvidia':
+        audioBuffer = await synthNvidia(client, logger, {credentials, stats, language, voice, model, text});
+        break;
+      case 'ibm':
+        audioBuffer = await synthIbm(logger, {credentials, stats, voice, text});
+        break;
+      case 'wellsaid':
+        audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
+        break;
+      default:
+        assert(`synthAudio: unsupported speech vendor ${vendor}`);
+    }
+    const diff = process.hrtime(startAt);
+    const time = diff[0] * 1e3 + diff[1] * 1e-6;
+    rtt = time.toFixed(0);
+    stats.histogram('tts.response_time', rtt, [`vendor:${vendorLabel}`]);
+    debug(`tts rtt time for ${text.length} chars on ${vendorLabel}: ${rtt}`);
+    logger.info(`tts rtt time for ${text.length} chars on ${vendorLabel}: ${rtt}`);
+
+    client.setexAsync(key, EXPIRES, audioBuffer.toString('base64'))
+      .catch((err) => logger.error(err, `error calling setex on key ${key}`));
+
+    if (['microsoft'].includes(vendor)) return {filePath, servedFromCache, rtt};
+  }
+
+  return new Promise((resolve, reject) => {
+    fs.writeFile(filePath, audioBuffer, (err) => {
+      if (err) return reject(err);
+      resolve({filePath, servedFromCache, rtt});
+    });
+  });
+}
+
+const synthPolly = async(logger, {credentials, stats, language, voice, engine, text}) => {
+  try {
+    const polly = new PollyClient(credentials);
+    const opts = {
+      Engine: engine,
+      OutputFormat: 'mp3',
+      Text: text,
+      LanguageCode: language,
+      TextType: text.startsWith('<speak>') ? 'ssml' : 'text',
+      VoiceId: voice
+    };
+    const command = new SynthesizeSpeechCommand(opts);
+    const data = await polly.send(command);
+    const chunks = [];
+    return new Promise((resolve, reject) => {
+      data.AudioStream
+        .on('error', (err) => {
+          logger.info({err}, 'synthAudio: Error synthesizing speech using aws polly');
+          stats.increment('tts.count', ['vendor:aws', 'accepted:no']);
+          reject(err);
+        })
+        .on('data', (chunk) => {
+          chunks.push(chunk);
+        })
+        .on('end', () => resolve(Buffer.concat(chunks)));
+    });
+  } catch (err) {
+    logger.info({err}, 'synthAudio: Error synthesizing speech using aws polly');
+    stats.increment('tts.count', ['vendor:aws', 'accepted:no']);
+    throw err;
+  }
+};
+
+const synthGoogle = async(logger, {credentials, stats, language, voice, gender, text}) => {
+  const client = new ttsGoogle.TextToSpeechClient(credentials);
+  const opts = {
+    voice: {
+      name: voice,
+      languageCode: language,
+      ssmlGender: gender || 'SSML_VOICE_GENDER_UNSPECIFIED'
+    },
+    audioConfig: {audioEncoding: 'MP3'}
+  };
+  Object.assign(opts, {input: text.startsWith('<speak>') ? {ssml: text} : {text}});
+  try {
+    const responses = await client.synthesizeSpeech(opts);
+    stats.increment('tts.count', ['vendor:google', 'accepted:yes']);
+    client.close();
+    return responses[0].audioContent;
+  } catch (err) {
+    console.error(err);
+    logger.info({err, opts}, 'synthAudio: Error synthesizing speech using google');
+    stats.increment('tts.count', ['vendor:google', 'accepted:no']);
+    client && client.close();
+    throw err;
+  }
+};
+
+const synthIbm = async(logger, {credentials, stats, voice, text}) => {
+  const {tts_api_key, tts_region} = credentials;
+  const params = {
+    text,
+    voice,
+    accept: 'audio/mp3'
+  };
+
+  try {
+    const textToSpeech = new TextToSpeechV1({
+      authenticator: new IamAuthenticator({
+        apikey: tts_api_key,
+      }),
+      serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
+    });
+
+    const r = await textToSpeech.synthesize(params);
+    const chunks = [];
+    for await (const chunk of r.result) {
+      chunks.push(chunk);
+    }
+    return Buffer.concat(chunks);
+  } catch (err) {
+    logger.info({err, params}, 'synthAudio: Error synthesizing speech using ibm');
+    stats.increment('tts.count', ['vendor:ibm', 'accepted:no']);
+    throw new Error(err.statusText || err.message);
+  }
+};
+
+const synthMicrosoft = async(logger, {
+  credentials,
+  stats,
+  language,
+  voice,
+  text,
+  filePath
+}) => {
+  try {
+    const {api_key: apiKey, region, use_custom_tts, custom_tts_endpoint} = credentials;
+    let content = text;
+    const speechConfig = SpeechConfig.fromSubscription(apiKey, region);
+    speechConfig.speechSynthesisLanguage = language;
+    speechConfig.speechSynthesisVoiceName = voice;
+    if (use_custom_tts && custom_tts_endpoint) {
+      speechConfig.endpointId = custom_tts_endpoint;
+
+      /**
+       * Note: it seems that to use custom voice ssml is required with the voice attribute
+       * Otherwise sending plain text we get "Voice does not match"
+       */
+      if (!content.startsWith('<speak')) content = `<speak>${text}</speak>`;
+    }
+    speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3;
+    const config = AudioConfig.fromAudioFileOutput(filePath);
+    const synthesizer = new SpeechSynthesizer(speechConfig, config);
+
+    if (content.startsWith('<speak>')) {
+      /* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
+      const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
+      // eslint-disable-next-line max-len
+      content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
+      logger.info({content}, 'synthMicrosoft');
+    }
+
+    return new Promise((resolve, reject) => {
+      const speakAsync = content.startsWith('<speak') ?
+        synthesizer.speakSsmlAsync.bind(synthesizer) :
+        synthesizer.speakTextAsync.bind(synthesizer);
+      speakAsync(
+        content,
+        async(result) => {
+          switch (result.reason) {
+            case ResultReason.Canceled:
+              const cancellation = CancellationDetails.fromResult(result);
+              logger.info({reason: cancellation.errorDetails}, 'synthAudio: (Microsoft) synthesis canceled');
+              synthesizer.close();
+              reject(cancellation.errorDetails);
+              break;
+            case ResultReason.SynthesizingAudioCompleted:
+              stats.increment('tts.count', ['vendor:microsoft', 'accepted:yes']);
+              synthesizer.close();
+              fs.readFile(filePath, (err, data) => {
+                if (err) return reject(err);
+                resolve(data);
+              });
+              break;
+            default:
+              logger.info({result}, 'synthAudio: (Microsoft) unexpected result');
+              break;
+          }
+        },
+        (err) => {
+          logger.info({err}, 'synthAudio: (Microsoft) error synthesizing');
+          stats.increment('tts.count', ['vendor:microsoft', 'accepted:no']);
+          synthesizer.close();
+          reject(err);
+        });
+    });
+  } catch (err) {
+    logger.info({err}, 'synthAudio: Error synthesizing speech using Microsoft');
+    stats.increment('tts.count', ['vendor:google', 'accepted:no']);
+  }
+};
+
+const synthWellSaid = async(logger, {credentials, stats, language, voice, gender, text}) => {
+  const {api_key} = credentials;
+  try {
+    const post = bent('https://api.wellsaidlabs.com', 'POST', 'buffer', {
+      'X-Api-Key': api_key,
+      'Accept': 'audio/mpeg',
+      'Content-Type': 'application/json'
+    });
+    const mp3 = await post('/v1/tts/stream', {
+      text,
+      speaker_id: voice
+    });
+    return mp3;
+  } catch (err) {
+    logger.info({err}, 'testWellSaidTts returned error');
+    throw err;
+  }
+};
+
+const synthNuance = async(client, logger, {credentials, stats, voice, model, text}) => {
+  /* get a nuance access token */
+  const {client_id, secret} = credentials;
+  const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts');
+  const nuanceClient = await createNuanceClient(access_token);
+
+  const v = new Voice();
+  const p = new AudioParameters();
+  const f = new AudioFormat();
+  const pcm = new PCM();
+  const params  = new EventParameters();
+  const request = new SynthesisRequest();
+  const input = new Input();
+
+  if (text.startsWith('<speak')) {
+    const ssml = new SSML();
+    ssml.setText(text);
+    input.setSsml(ssml);
+  }
+  else {
+    const t = new Text();
+    t.setText(text);
+    input.setText(t);
+  }
+
+  pcm.setSampleRateHz(8000);
+  f.setPcm(pcm);
+  p.setAudioFormat(f);
+  v.setName(voice);
+  v.setModel(model);
+  request.setVoice(v);
+  request.setAudioParams(p);
+  request.setInput(input);
+  request.setEventParams(params);
+  request.setUserId('jambonz');
+
+  return new Promise((resolve, reject) => {
+    nuanceClient.unarySynthesize(request, (err, response) => {
+      if (err) {
+        console.error(err);
+        return reject(err);
+      }
+      const status = response.getStatus();
+      const code = status.getCode();
+      if (code !== 200) {
+        const message = status.getMessage();
+        const details = status.getDetails();
+        return reject({code, message, details});
+      }
+      resolve(Buffer.from(response.getAudio()));
+    });
+  });
+};
+
+const synthNvidia = async(client, logger, {credentials, stats, language,  voice, model, text}) => {
+  const {riva_uri} = credentials;
+  const rivaClient = await createRivaClient(riva_uri);
+
+  const request = new SynthesizeSpeechRequest();
+  request.setVoiceName(voice);
+  request.setLanguageCode(language);
+  request.setSampleRateHz(8000);
+  request.setEncoding(AudioEncoding.LINEAR_PCM);
+  request.setText(text);
+
+  return new Promise((resolve, reject) => {
+    console.log(`language ${language} voice ${voice} model ${model} text ${text}`);
+    rivaClient.synthesize(request, (err, response) => {
+      if (err) {
+        console.error(err);
+        return reject(err);
+      }
+      resolve(Buffer.from(response.getAudio()));
+    });
+  });
+};
+
+module.exports = synthAudio;
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -0,0 +1,25 @@
+const crypto = require('crypto');
+/**
+ * Future TODO: cache recently used connections to providers
+ * to avoid connection overhead during a call.
+ * Will need to periodically age them out to avoid memory leaks.
+ */
+//const nuanceClientMap = new Map();
+
+function makeSynthKey({vendor, language, voice, engine = '', text}) {
+  const hash = crypto.createHash('sha1');
+  hash.update(`${language}:${vendor}:${voice}:${engine}:${text}`);
+  return `tts:${hash.digest('hex')}`;
+}
+
+const noopLogger = {
+  info: () => {},
+  debug: () => {},
+  error: () => {}
+};
+
+
+module.exports = {
+  makeSynthKey,
+  noopLogger
+};
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -24,6 +24,7 @@
  },
  "homepage": "https://github.com/jambonz/speech-utils#readme",
  "dependencies": {
+    "@aws-sdk/client-polly": "^3.269.0",
    "@google-cloud/text-to-speech": "^4.2.0",
    "@grpc/grpc-js": "^1.8.7",
    "@jambonz/realtimedb-helpers": "^0.6.3",
@@ -36,6 +37,7 @@
    "undici": "^5.18.0"
  },
  "devDependencies": {
+    "config": "^3.3.9",
    "eslint": "^8.33.0",
    "eslint-plugin-promise": "^6.1.1",
    "nyc": "^15.1.0",
--- a/protos/riva/proto/riva_audio.proto
+++ b/protos/riva/proto/riva_audio.proto
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+syntax = "proto3";
+
+package nvidia.riva;
+
+option cc_enable_arenas = true;
+option go_package = "nvidia.com/riva_speech";
+
+/*
+ * AudioEncoding specifies the encoding of the audio bytes in the encapsulating message.
+ */
+enum AudioEncoding {
+    // Not specified.
+    ENCODING_UNSPECIFIED = 0;
+
+    // Uncompressed 16-bit signed little-endian samples (Linear PCM).
+    LINEAR_PCM = 1;
+
+    // `FLAC` (Free Lossless Audio
+    // Codec) is the recommended encoding because it is
+    // lossless--therefore recognition is not compromised--and
+    // requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
+    // encoding supports 16-bit and 24-bit samples, however, not all fields in
+    // `STREAMINFO` are supported.
+    FLAC = 2;
+
+    // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
+    MULAW = 3;
+
+    OGGOPUS = 4;
+
+    // 8-bit samples that compand 13-bit audio samples using G.711 PCMU/a-law.
+    ALAW = 20;
+}
--- a/protos/riva/proto/riva_tts.proto
+++ b/protos/riva/proto/riva_tts.proto
@@ -0,0 +1,77 @@
+// SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+syntax = "proto3";
+
+package nvidia.riva.tts;
+
+option cc_enable_arenas = true;
+option go_package = "nvidia.com/riva_speech";
+
+import "riva/proto/riva_audio.proto";
+
+service RivaSpeechSynthesis {
+    // Used to request text-to-speech from the service. Submit a request containing the
+    // desired text and configuration, and receive audio bytes in the requested format.
+    rpc Synthesize(SynthesizeSpeechRequest) returns (SynthesizeSpeechResponse) {}
+
+    // Used to request text-to-speech returned via stream as it becomes available.
+    // Submit a SynthesizeSpeechRequest with desired text and configuration,
+    // and receive stream of bytes in the requested format.
+    rpc SynthesizeOnline(SynthesizeSpeechRequest) returns (stream SynthesizeSpeechResponse) {}
+
+    //Enables clients to request the configuration of the current Synthesize service, or a specific model within the service.
+    rpc GetRivaSynthesisConfig(RivaSynthesisConfigRequest) returns (RivaSynthesisConfigResponse) {}
+}
+
+message RivaSynthesisConfigRequest {
+   //If model is specified only return config for model, otherwise return all configs.
+   string model_name = 1;
+}
+
+message RivaSynthesisConfigResponse {
+    message Config {
+       string model_name = 1;
+       map<string,string> parameters = 2;
+    }
+
+    repeated Config model_config = 1;
+}
+
+message SynthesizeSpeechRequest {
+    string text = 1;
+    string language_code = 2;
+    // audio encoding params
+    AudioEncoding encoding = 3;
+ 
+    //  The sample rate in hertz (Hz) of the audio output requested through `SynthesizeSpeechRequest` messages.
+    //  Models produce an output at a fixed rate. The sample rate enables you to resample the generated audio output if required.
+    //  You use the sample rate to up-sample or down-sample the audio for various scenarios. For example, the sample rate can be set to 8kHz (kilohertz) if the output
+    //  audio is desired for a low bandwidth application.
+    //  The sample rate values below 8kHz will not produce any meaningful output. Also, up-sampling too much will increase the
+    //  size of the output without improving the output audio quality.
+
+    int32 sample_rate_hz = 4;
+    // voice params
+    string voice_name = 5;
+}
+
+message SynthesizeSpeechResponseMetadata {
+    // Currently experimental API addition that returns the input text
+    // after preprocessing has been completed as well as the predicted
+    // duration for each token.
+    // Note: this message is subject to future breaking changes, and potential
+    // removal.
+    string text = 1;
+    string processed_text = 2;
+    repeated float predicted_durations = 8;
+}
+
+message SynthesizeSpeechResponse {
+    bytes audio = 1;
+    SynthesizeSpeechResponseMetadata meta = 2;
+}
+
+/*
+ *
+ */
--- a/stubs/riva/proto/riva_audio_grpc_pb.js
+++ b/stubs/riva/proto/riva_audio_grpc_pb.js
@@ -0,0 +1 @@
+// GENERATED CODE -- NO SERVICES IN PROTO
--- a/stubs/riva/proto/riva_audio_pb.js
+++ b/stubs/riva/proto/riva_audio_pb.js
@@ -0,0 +1,31 @@
+// source: riva/proto/riva_audio.proto
+/**
+ * @fileoverview
+ * @enhanceable
+ * @suppress {missingRequire} reports error on implicit type usages.
+ * @suppress {messageConventions} JS Compiler reports an error if a variable or
+ *     field starts with 'MSG_' and isn't a translatable message.
+ * @public
+ */
+// GENERATED CODE -- DO NOT EDIT!
+/* eslint-disable */
+// @ts-nocheck
+
+var jspb = require('google-protobuf');
+var goog = jspb;
+var global = Function('return this')();
+
+goog.exportSymbol('proto.nvidia.riva.AudioEncoding', null, global);
+/**
+ * @enum {number}
+ */
+proto.nvidia.riva.AudioEncoding = {
+  ENCODING_UNSPECIFIED: 0,
+  LINEAR_PCM: 1,
+  FLAC: 2,
+  MULAW: 3,
+  OGGOPUS: 4,
+  ALAW: 20
+};
+
+goog.object.extend(exports, proto.nvidia.riva);
--- a/stubs/riva/proto/riva_tts_grpc_pb.js
+++ b/stubs/riva/proto/riva_tts_grpc_pb.js
@@ -0,0 +1,99 @@
+// GENERATED CODE -- DO NOT EDIT!
+
+// Original file comments:
+// SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+'use strict';
+var grpc = require('@grpc/grpc-js');
+var riva_proto_riva_tts_pb = require('../../riva/proto/riva_tts_pb.js');
+var riva_proto_riva_audio_pb = require('../../riva/proto/riva_audio_pb.js');
+
+function serialize_nvidia_riva_tts_RivaSynthesisConfigRequest(arg) {
+  if (!(arg instanceof riva_proto_riva_tts_pb.RivaSynthesisConfigRequest)) {
+    throw new Error('Expected argument of type nvidia.riva.tts.RivaSynthesisConfigRequest');
+  }
+  return Buffer.from(arg.serializeBinary());
+}
+
+function deserialize_nvidia_riva_tts_RivaSynthesisConfigRequest(buffer_arg) {
+  return riva_proto_riva_tts_pb.RivaSynthesisConfigRequest.deserializeBinary(new Uint8Array(buffer_arg));
+}
+
+function serialize_nvidia_riva_tts_RivaSynthesisConfigResponse(arg) {
+  if (!(arg instanceof riva_proto_riva_tts_pb.RivaSynthesisConfigResponse)) {
+    throw new Error('Expected argument of type nvidia.riva.tts.RivaSynthesisConfigResponse');
+  }
+  return Buffer.from(arg.serializeBinary());
+}
+
+function deserialize_nvidia_riva_tts_RivaSynthesisConfigResponse(buffer_arg) {
+  return riva_proto_riva_tts_pb.RivaSynthesisConfigResponse.deserializeBinary(new Uint8Array(buffer_arg));
+}
+
+function serialize_nvidia_riva_tts_SynthesizeSpeechRequest(arg) {
+  if (!(arg instanceof riva_proto_riva_tts_pb.SynthesizeSpeechRequest)) {
+    throw new Error('Expected argument of type nvidia.riva.tts.SynthesizeSpeechRequest');
+  }
+  return Buffer.from(arg.serializeBinary());
+}
+
+function deserialize_nvidia_riva_tts_SynthesizeSpeechRequest(buffer_arg) {
+  return riva_proto_riva_tts_pb.SynthesizeSpeechRequest.deserializeBinary(new Uint8Array(buffer_arg));
+}
+
+function serialize_nvidia_riva_tts_SynthesizeSpeechResponse(arg) {
+  if (!(arg instanceof riva_proto_riva_tts_pb.SynthesizeSpeechResponse)) {
+    throw new Error('Expected argument of type nvidia.riva.tts.SynthesizeSpeechResponse');
+  }
+  return Buffer.from(arg.serializeBinary());
+}
+
+function deserialize_nvidia_riva_tts_SynthesizeSpeechResponse(buffer_arg) {
+  return riva_proto_riva_tts_pb.SynthesizeSpeechResponse.deserializeBinary(new Uint8Array(buffer_arg));
+}
+
+
+var RivaSpeechSynthesisService = exports.RivaSpeechSynthesisService = {
+  // Used to request text-to-speech from the service. Submit a request containing the
+// desired text and configuration, and receive audio bytes in the requested format.
+synthesize: {
+    path: '/nvidia.riva.tts.RivaSpeechSynthesis/Synthesize',
+    requestStream: false,
+    responseStream: false,
+    requestType: riva_proto_riva_tts_pb.SynthesizeSpeechRequest,
+    responseType: riva_proto_riva_tts_pb.SynthesizeSpeechResponse,
+    requestSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechRequest,
+    requestDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechRequest,
+    responseSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechResponse,
+    responseDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechResponse,
+  },
+  // Used to request text-to-speech returned via stream as it becomes available.
+// Submit a SynthesizeSpeechRequest with desired text and configuration,
+// and receive stream of bytes in the requested format.
+synthesizeOnline: {
+    path: '/nvidia.riva.tts.RivaSpeechSynthesis/SynthesizeOnline',
+    requestStream: false,
+    responseStream: true,
+    requestType: riva_proto_riva_tts_pb.SynthesizeSpeechRequest,
+    responseType: riva_proto_riva_tts_pb.SynthesizeSpeechResponse,
+    requestSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechRequest,
+    requestDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechRequest,
+    responseSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechResponse,
+    responseDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechResponse,
+  },
+  // Enables clients to request the configuration of the current Synthesize service, or a specific model within the service.
+getRivaSynthesisConfig: {
+    path: '/nvidia.riva.tts.RivaSpeechSynthesis/GetRivaSynthesisConfig',
+    requestStream: false,
+    responseStream: false,
+    requestType: riva_proto_riva_tts_pb.RivaSynthesisConfigRequest,
+    responseType: riva_proto_riva_tts_pb.RivaSynthesisConfigResponse,
+    requestSerialize: serialize_nvidia_riva_tts_RivaSynthesisConfigRequest,
+    requestDeserialize: deserialize_nvidia_riva_tts_RivaSynthesisConfigRequest,
+    responseSerialize: serialize_nvidia_riva_tts_RivaSynthesisConfigResponse,
+    responseDeserialize: deserialize_nvidia_riva_tts_RivaSynthesisConfigResponse,
+  },
+};
+
+exports.RivaSpeechSynthesisClient = grpc.makeGenericClientConstructor(RivaSpeechSynthesisService);
--- a/stubs/riva/proto/riva_tts_pb.js
+++ b/stubs/riva/proto/riva_tts_pb.js
--- a/test/docker_start.js
+++ b/test/docker_start.js
@@ -0,0 +1,12 @@
+const test = require('tape').test ;
+const exec = require('child_process').exec ;
+
+test('starting docker network..', (t) => {
+  exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
+    setTimeout(() => {
+      t.end(err);
+    }, 2000);
+  });
+});
+
+  
--- a/test/docker_stop.js
+++ b/test/docker_stop.js
@@ -0,0 +1,12 @@
+const test = require('tape').test ;
+const exec = require('child_process').exec ;
+
+test('stopping docker network..', (t) => {
+  t.timeoutAfter(10000);
+  exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
+    //console.log(`stderr: ${stderr}`);
+    process.exit(0);
+  });
+  t.end() ;
+});
+
--- a/test/ibm.js
+++ b/test/ibm.js
@@ -0,0 +1,78 @@
+const test = require('tape').test ;
+const config = require('config');
+const opts = config.get('redis');
+const fs = require('fs');
+const logger = require('pino')({level: 'error'});
+process.on('unhandledRejection', (reason, p) => {
+  console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
+});
+
+const stats = {
+  increment: () => {},
+  histogram: () => {}
+};
+
+test('IBM - create access key', async(t) => {
+  const fn = require('..');
+  const {client, getIbmAccessToken} = fn(opts, logger);
+
+  if (!process.env.IBM_API_KEY ) {
+      t.pass('skipping IBM test since no IBM api_key provided');
+      t.end();
+      client.quit();
+      return;
+  }
+  try {
+    let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
+    //console.log({obj}, 'received access token from IBM');
+    t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
+
+    obj = await getIbmAccessToken(process.env.IBM_API_KEY);
+    //console.log({obj}, 'received access token from IBM - second request');
+    t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
+ 
+    await client.flushallAsync();
+    t.end();
+  }
+  catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('IBM - retrieve tts voices test', async(t) => {
+  const fn = require('..');
+  const {client, getTtsVoices} = fn(opts, logger);
+
+  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
+      t.pass('skipping IBM test since no IBM api_key and/or region provided');
+      t.end();
+      client.quit();
+      return;
+  }
+  try {
+    const opts = {
+      vendor: 'ibm',
+      credentials: {
+        tts_api_key: process.env.IBM_TTS_API_KEY,
+        tts_region: process.env.IBM_TTS_REGION
+      }
+    };
+    const obj = await getTtsVoices(opts);
+    const {voices} = obj.result;
+    //console.log(JSON.stringify(voices));
+    t.ok(voices.length > 0 && voices[0].language, 
+      `GetVoices: successfully retrieved ${voices.length} voices from IBM`);
+ 
+    await client.flushallAsync();
+
+    t.end();
+
+  }
+  catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
--- a/test/index.js
+++ b/test/index.js
@@ -0,0 +1,5 @@
+require('./docker_start');
+require('./synth');
+require('./nuance');
+require('./ibm');
+require('./docker_stop');
--- a/test/nuance.js
+++ b/test/nuance.js
@@ -0,0 +1,50 @@
+const test = require('tape').test ;
+const config = require('config');
+const opts = config.get('redis');
+const fs = require('fs');
+const logger = require('pino')({level: 'error'});
+process.on('unhandledRejection', (reason, p) => {
+  console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
+});
+
+const stats = {
+  increment: () => {},
+  histogram: () => {}
+};
+
+test('Nuance tests', async(t) => {
+  const fn = require('..');
+  const {client, getTtsVoices} = fn(opts, logger);
+
+  if (!process.env.NUANCE_CLIENT_ID || !process.env.NUANCE_SECRET ) {
+      t.pass('skipping Nuance test since no Nuance client_id and secret provided');
+      t.end();
+      client.quit();
+      return;
+  }
+  try {
+    const opts = {
+      vendor: 'nuance',
+      credentials: {
+        client_id: process.env.NUANCE_CLIENT_ID,
+        secret: process.env.NUANCE_SECRET
+      }
+    };
+    let voices = await getTtsVoices(opts);
+    //console.log(`received ${voices.length} voices from Nuance`);
+    //console.log(JSON.stringify(voices));
+    t.ok(voices.length > 0 && voices[0].language, 
+      `GetVoices: successfully retrieved ${voices.length} voices from Nuance`);
+
+    await client.flushallAsync();
+
+    t.end();
+
+  }
+  catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
--- a/test/synth.js
+++ b/test/synth.js
@@ -0,0 +1,382 @@
+const test = require('tape').test;
+const config = require('config');
+const opts = config.get('redis');
+const fs = require('fs');
+const {makeSynthKey} = require('../lib/utils');
+const logger = require('pino')();
+
+process.on('unhandledRejection', (reason, p) => {
+  console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
+});
+
+const stats = {
+  increment: () => {
+  },
+  histogram: () => {
+  },
+};
+
+test('Google speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.GCP_FILE && !process.env.GCP_JSON_KEY) {
+    t.pass('skipping google speech synth tests since neither GCP_FILE nor GCP_JSON_KEY provided');
+    return t.end();
+  }
+  try {
+    const str = process.env.GCP_JSON_KEY || fs.readFileSync(process.env.GCP_FILE);
+    const creds = JSON.parse(str);
+    let opts = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-GB',
+      gender: 'MALE',
+      text: 'This is a test.  This is only a test',
+      salt: 'foo.bar',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized google audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-GB',
+      gender: 'MALE',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved cached google audio from ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      disableTtsCache: true,
+      language: 'en-GB',
+      gender: 'MALE',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized google audio regardless of current cache to ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('AWS speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_SECRET_ACCESS_KEY || !process.env.AWS_REGION) {
+    t.pass('skipping AWS speech synth tests since AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, or AWS_REGION not provided');
+    return t.end();
+  }
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'aws',
+      credentials: {
+        accessKeyId: process.env.AWS_ACCESS_KEY_ID,
+        secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
+        region: process.env.AWS_REGION,
+      },
+      language: 'en-US',
+      voice: 'Joey',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized aws audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'aws',
+      credentials: {
+        accessKeyId: process.env.AWS_ACCESS_KEY_ID,
+        secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
+        region: process.env.AWS_REGION,
+      },
+      language: 'en-US',
+      voice: 'Joey',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved aws audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('Azure speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.MICROSOFT_API_KEY || !process.env.MICROSOFT_REGION) {
+    t.pass('skipping Microsoft speech synth tests since MICROSOFT_API_KEY or MICROSOFT_REGION not provided');
+    return t.end();
+  }
+  try {
+    const longText = `Henry is best known for his six marriages, including his efforts to have his first marriage 
+    (to Catherine of Aragon) annulled. His disagreement with Pope Clement VII about such an 
+    annulment led Henry to initiate the English Reformation, 
+    separating the Church of England from papal authority. He appointed himself Supreme Head of the Church of England 
+    and dissolved convents and monasteries, for which he was excommunicated. 
+    Henry is also known as "the father of the Royal Navy," as he invested heavily in the navy, 
+    increasing its size from a few to more than 50 ships, and established the Navy Board.`;
+
+    let opts = await synthAudio(stats, {
+      vendor: 'microsoft',
+      credentials: {
+        api_key: process.env.MICROSOFT_API_KEY,
+        region: process.env.MICROSOFT_REGION,
+      },
+      language: 'en-US',
+      voice: 'en-US-ChristopherNeural',
+      text: longText,
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized microsoft audio to ${opts.filePath}`);
+
+
+    opts = await synthAudio(stats, {
+      vendor: 'microsoft',
+      credentials: {
+        api_key: process.env.MICROSOFT_API_KEY,
+        region: process.env.MICROSOFT_REGION,
+      },
+      language: 'en-US',
+      voice: 'en-US-ChristopherNeural',
+      text: longText,
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved microsoft audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('Azure custom voice speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.MICROSOFT_CUSTOM_API_KEY || !process.env.MICROSOFT_DEPLOYMENT_ID || !process.env.MICROSOFT_CUSTOM_REGION) {
+    t.pass('skipping Microsoft speech synth custom voice tests since MICROSOFT_CUSTOM_API_KEY or MICROSOFT_DEPLOYMENT_ID or MICROSOFT_CUSTOM_REGION not provided');
+    return t.end();
+  }
+  try {
+    const text = 'Hi, this is my custom voice. How does it sound to you?  Do I have a future as a virtual bot?';
+    let opts = await synthAudio(stats, {
+      vendor: 'microsoft',
+      credentials: {
+        api_key: process.env.MICROSOFT_CUSTOM_API_KEY,
+        region: process.env.MICROSOFT_CUSTOM_REGION,
+        use_custom_tts: true,
+        custom_tts_endpoint: process.env.MICROSOFT_DEPLOYMENT_ID,
+      },
+      language: 'en-US',
+      voice: process.env.MICROSOFT_CUSTOM_VOICE,
+      text,
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized microsoft audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'microsoft',
+      credentials: {
+        api_key: process.env.MICROSOFT_CUSTOM_API_KEY,
+        region: process.env.MICROSOFT_CUSTOM_REGION,
+        use_custom_tts: true,
+        custom_tts_endpoint: process.env.MICROSOFT_DEPLOYMENT_ID,
+      },
+      language: 'en-US',
+      voice: process.env.MICROSOFT_CUSTOM_VOICE,
+      text,
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved microsoft custom voice audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('Nuance speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.NUANCE_CLIENT_ID || !process.env.NUANCE_SECRET) {
+    t.pass('skipping Nuance speech synth tests since NUANCE_CLIENT_ID or NUANCE_SECRET not provided');
+    return t.end();
+  }
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'nuance',
+      credentials: {
+        client_id: process.env.NUANCE_CLIENT_ID,
+        secret: process.env.NUANCE_SECRET,
+      },
+      language: 'en-US',
+      voice: 'Evan',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'nuance',
+      credentials: {
+        client_id: process.env.NUANCE_CLIENT_ID,
+        secret: process.env.NUANCE_SECRET,
+      },
+      language: 'en-US',
+      voice: 'Evan',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved nuance audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('Nvidia speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.RIVA_URI) {
+    t.pass('skipping Nvidia speech synth tests since RIVA_URI not provided');
+    return t.end();
+  }
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'nvidia',
+      credentials: {
+        riva_uri: process.env.RIVA_URI,
+      },
+      language: 'en-US',
+      voice: 'English-US.Female-1',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'nvidia',
+      credentials: {
+        riva_uri: process.env.RIVA_URI,
+      },
+      language: 'en-US',
+      voice: 'English-US.Female-1',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved nuance audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('IBM watson speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
+    t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
+    return t.end();
+  }
+  const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'ibm',
+      credentials: {
+        tts_api_key: process.env.IBM_TTS_API_KEY,
+        tts_region: process.env.IBM_TTS_REGION,
+      },
+      language: 'en-US',
+      voice: 'en-US_AllisonV2Voice',
+      text,
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized ibm audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'ibm',
+      credentials: {
+        tts_api_key: process.env.IBM_TTS_API_KEY,
+        tts_region: process.env.IBM_TTS_REGION,
+      },
+      language: 'en-US',
+      voice: 'en-US_AllisonV2Voice',
+      text,
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved ibm audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(JSON.stringify(err));
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('TTS Cache tests', async(t) => {
+  const fn = require('..');
+  const {purgeTtsCache, client} = fn(opts, logger);
+
+  try {
+    // save some random tts keys to cache
+    const minRecords = 8;
+    for (const i in Array(minRecords).fill(0)) {
+      await client.setAsync(makeSynthKey({vendor: i, language: i, voice: i, engine: i, text: i}), i);
+    }
+    const {purgedCount} = await purgeTtsCache();
+    t.ok(purgedCount >= minRecords, `successfully purged at least ${minRecords} tts records from cache`);
+
+    const cached = (await client.keysAsync('tts:*')).length;
+    t.equal(cached, 0, `successfully purged all tts records from cache`);
+
+  } catch (err) {
+    console.error(JSON.stringify(err));
+    t.end(err);
+  }
+
+  try {
+    // save some random tts keys to cache
+    for (const i in Array(10).fill(0)) {
+      await client.setAsync(makeSynthKey({vendor: i, language: i, voice: i, engine: i, text: i}), i);
+    }
+    // save a specific key to tts cache
+    const opts = {vendor: 'aws', language: 'en-US', voice: 'MALE', engine: 'Engine', text: 'Hello World!'};
+    await client.setAsync(makeSynthKey(opts), opts.text);
+
+    const {purgedCount} = await purgeTtsCache({all: false, ...opts});
+    t.ok(purgedCount === 1, `successfully purged one specific tts record from cache`);
+
+    // returns error for unknown key
+    const {purgedCount: purgedCountWhenErrored, error} = await purgeTtsCache({
+      all: false,
+      vendor: 'non-existing',
+      language: 'non-existing',
+      voice: 'non-existing',
+    });
+    t.ok(purgedCountWhenErrored === 0, `purged no records when specified key was not found`);
+    t.ok(error, `error returned when specified key was not found`);
+
+    // make sure other tts keys are still there
+    const cached = (await client.keysAsync('tts:*')).length;
+    t.ok(cached >= 1, `successfully kept all non-specified tts records in cache`);
+
+  } catch (err) {
+    console.error(JSON.stringify(err));
+    t.end(err);
+  }
+
+  client.quit();
+});
--- a/test/tmp/redis.conf
+++ b/test/tmp/redis.conf