mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
feat: add nuance, riva, ibm
This commit is contained in:
13
config/test.json
Normal file
13
config/test.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"logging": {
|
||||||
|
"level": "error"
|
||||||
|
},
|
||||||
|
"redis": {
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"port": 3379
|
||||||
|
},
|
||||||
|
"redis-auth": {
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"port": 3380
|
||||||
|
}
|
||||||
|
}
|
||||||
32
index.js
32
index.js
@@ -0,0 +1,32 @@
|
|||||||
|
const {noopLogger} = require('./lib/utils');
|
||||||
|
const promisify = require('@jambonz/promisify-redis');
|
||||||
|
const redis = promisify(require('redis'));
|
||||||
|
|
||||||
|
module.exports = (opts, logger) => {
|
||||||
|
const {host = '127.0.0.1', port = 6379, tls = false} = opts;
|
||||||
|
logger = logger || noopLogger;
|
||||||
|
|
||||||
|
const url = process.env.JAMBONES_REDIS_USERNAME && process.env.JAMBONES_REDIS_PASSWORD ?
|
||||||
|
`${process.env.JAMBONES_REDIS_USERNAME}:${process.env.JAMBONES_REDIS_PASSWORD}@${host}:${port}` :
|
||||||
|
`${host}:${port}`;
|
||||||
|
const client = redis.createClient(tls ? `rediss://${url}` : `redis://${url}`);
|
||||||
|
['ready', 'connect', 'reconnecting', 'error', 'end', 'warning']
|
||||||
|
.forEach((event) => {
|
||||||
|
client.on(event, (...args) => {
|
||||||
|
if ('error' === event) {
|
||||||
|
if (process.env.NODE_ENV === 'test' && args[0]?.code === 'ECONNREFUSED') return;
|
||||||
|
logger.error({...args}, '@jambonz/realtimedb-helpers - redis error');
|
||||||
|
}
|
||||||
|
else logger.debug({args}, `redis event ${event}`);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
client,
|
||||||
|
purgeTtsCache: require('./lib/purge-tts-cache').bind(null, client, logger),
|
||||||
|
synthAudio: require('./lib/synth-audio').bind(null, client, logger),
|
||||||
|
getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
|
||||||
|
getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
|
||||||
|
getTtsVoices: require('./lib/get-tts-voices').bind(null, client, logger),
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|||||||
48
lib/get-ibm-access-token.js
Normal file
48
lib/get-ibm-access-token.js
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
const formurlencoded = require('form-urlencoded');
|
||||||
|
const {Pool} = require('undici');
|
||||||
|
const pool = new Pool('https://iam.cloud.ibm.com');
|
||||||
|
const {makeIbmKey, noopLogger} = require('./utils');
|
||||||
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
|
const HTTP_TIMEOUT = 5000;
|
||||||
|
|
||||||
|
async function getIbmAccessToken(client, logger, apiKey) {
|
||||||
|
logger = logger || noopLogger;
|
||||||
|
try {
|
||||||
|
const key = makeIbmKey(apiKey);
|
||||||
|
const access_token = await client.getAsync(key);
|
||||||
|
if (access_token) return {access_token, servedFromCache: true};
|
||||||
|
|
||||||
|
/* access token not found in cache, so fetch it from Ibm */
|
||||||
|
const payload = {
|
||||||
|
grant_type: 'urn:ibm:params:oauth:grant-type:apikey',
|
||||||
|
apikey: apiKey
|
||||||
|
};
|
||||||
|
const {statusCode, headers, body} = await pool.request({
|
||||||
|
path: '/identity/token',
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
},
|
||||||
|
body: formurlencoded(payload),
|
||||||
|
timeout: HTTP_TIMEOUT,
|
||||||
|
followRedirects: false
|
||||||
|
});
|
||||||
|
|
||||||
|
if (200 !== statusCode) {
|
||||||
|
const json = await body.json();
|
||||||
|
logger.debug({statusCode, headers, body: json}, 'error fetching access token from Ibm');
|
||||||
|
const err = new Error();
|
||||||
|
err.statusCode = statusCode;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
const json = await body.json();
|
||||||
|
await client.set(key, json.access_token, 'EX', json.expires_in - 30);
|
||||||
|
return {...json, servedFromCache: false};
|
||||||
|
} catch (err) {
|
||||||
|
debug(err, 'getIbmAccessToken: Error retrieving Ibm access token');
|
||||||
|
logger.error(err, 'getIbmAccessToken: Error retrieving Ibm access token for client_id ${clientId}');
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = getIbmAccessToken;
|
||||||
49
lib/get-nuance-access-token.js
Normal file
49
lib/get-nuance-access-token.js
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
const formurlencoded = require('form-urlencoded');
|
||||||
|
const {Pool} = require('undici');
|
||||||
|
const pool = new Pool('https://auth.crt.nuance.com');
|
||||||
|
const {makeNuanceKey, makeBasicAuthHeader, noopLogger} = require('./utils');
|
||||||
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
|
const HTTP_TIMEOUT = 5000;
|
||||||
|
|
||||||
|
async function getNuanceAccessToken(client, logger, clientId, secret, scope) {
|
||||||
|
logger = logger || noopLogger;
|
||||||
|
try {
|
||||||
|
const key = makeNuanceKey(clientId, secret, scope);
|
||||||
|
const access_token = await client.getAsync(key);
|
||||||
|
if (access_token) return {access_token, servedFromCache: true};
|
||||||
|
|
||||||
|
/* access token not found in cache, so fetch it from Nuance */
|
||||||
|
const payload = {
|
||||||
|
grant_type: 'client_credentials',
|
||||||
|
scope
|
||||||
|
};
|
||||||
|
const auth = makeBasicAuthHeader(clientId, secret);
|
||||||
|
const {statusCode, headers, body} = await pool.request({
|
||||||
|
path: '/oauth2/token',
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
...auth,
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
},
|
||||||
|
body: formurlencoded(payload),
|
||||||
|
timeout: HTTP_TIMEOUT,
|
||||||
|
followRedirects: false
|
||||||
|
});
|
||||||
|
|
||||||
|
if (200 !== statusCode) {
|
||||||
|
logger.debug({statusCode, headers, body: body.text()}, 'error fetching access token from Nuance');
|
||||||
|
const err = new Error();
|
||||||
|
err.statusCode = statusCode;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
const json = await body.json();
|
||||||
|
await client.set(key, json.access_token, 'EX', json.expires_in - 30);
|
||||||
|
return {...json, servedFromCache: false};
|
||||||
|
} catch (err) {
|
||||||
|
debug(err, `getNuanceAccessToken: Error retrieving Nuance access token for client_id ${clientId}`);
|
||||||
|
logger.error(err, `getNuanceAccessToken: Error retrieving Nuance access token for client_id ${clientId}`);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = getNuanceAccessToken;
|
||||||
111
lib/get-tts-voices.js
Normal file
111
lib/get-tts-voices.js
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
const assert = require('assert');
|
||||||
|
const {noopLogger, createNuanceClient} = require('./utils');
|
||||||
|
const getNuanceAccessToken = require('./get-nuance-access-token');
|
||||||
|
const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
|
||||||
|
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
|
||||||
|
const { IamAuthenticator } = require('ibm-watson/auth');
|
||||||
|
|
||||||
|
const getIbmVoices = async(client, logger, credentials) => {
|
||||||
|
const {tts_region, tts_api_key} = credentials;
|
||||||
|
console.log(`region: ${tts_region}, api_key: ${tts_api_key}`);
|
||||||
|
|
||||||
|
const textToSpeech = new TextToSpeechV1({
|
||||||
|
authenticator: new IamAuthenticator({
|
||||||
|
apikey: tts_api_key,
|
||||||
|
}),
|
||||||
|
serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
|
||||||
|
});
|
||||||
|
|
||||||
|
const voices = await textToSpeech.listVoices();
|
||||||
|
return voices;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getNuanceVoices = async(client, logger, credentials) => {
|
||||||
|
const {client_id: clientId, secret: secret} = credentials;
|
||||||
|
|
||||||
|
return new Promise(async(resolve, reject) => {
|
||||||
|
/* get a nuance access token */
|
||||||
|
let token, nuanceClient;
|
||||||
|
try {
|
||||||
|
const access_token = await getNuanceAccessToken(client, logger, clientId, secret, 'tts');
|
||||||
|
token = access_token.access_token;
|
||||||
|
nuanceClient = await createNuanceClient(token);
|
||||||
|
} catch (err) {
|
||||||
|
logger.error({err}, 'getTtsVoices: error retrieving access token');
|
||||||
|
return reject(err);
|
||||||
|
}
|
||||||
|
/* retrieve all voices */
|
||||||
|
const v = new Voice();
|
||||||
|
const request = new GetVoicesRequest();
|
||||||
|
request.setVoice(v);
|
||||||
|
|
||||||
|
nuanceClient.getVoices(request, (err, response) => {
|
||||||
|
if (err) {
|
||||||
|
logger.error({err, clientId, secret, token}, 'getTtsVoices: error retrieving voices');
|
||||||
|
return reject(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return all the voices that are not restricted and eliminate duplicates */
|
||||||
|
const voices = response.getVoicesList()
|
||||||
|
.map((v) => {
|
||||||
|
return {
|
||||||
|
language: v.getLanguage(),
|
||||||
|
name: v.getName(),
|
||||||
|
model: v.getModel(),
|
||||||
|
gender: v.getGender() === 1 ? 'male' : 'female',
|
||||||
|
restricted: v.getRestricted()
|
||||||
|
};
|
||||||
|
});
|
||||||
|
const v = voices
|
||||||
|
.filter((v) => v.restricted === false)
|
||||||
|
.map((v) => {
|
||||||
|
delete v.restricted;
|
||||||
|
return v;
|
||||||
|
})
|
||||||
|
.sort((a, b) => {
|
||||||
|
if (a.language < b.language) return -1;
|
||||||
|
if (a.language > b.language) return 1;
|
||||||
|
if (a.name < b.name) return -1;
|
||||||
|
return 1;
|
||||||
|
});
|
||||||
|
const arr = [...new Set(v.map((v) => JSON.stringify(v)))]
|
||||||
|
.map((v) => JSON.parse(v));
|
||||||
|
resolve(arr);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synthesize speech to an mp3 file, and also cache the generated speech
|
||||||
|
* in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
|
||||||
|
* time and again for speech synthesis of the same text.
|
||||||
|
* It is the responsibility of the caller to unlink the mp3 file after use.
|
||||||
|
*
|
||||||
|
* @param {*} client - redis client
|
||||||
|
* @param {*} logger - pino logger
|
||||||
|
* @param {object} opts - options
|
||||||
|
* @param {string} opts.vendor - 'google' or 'aws' ('polly' is an alias for 'aws')
|
||||||
|
* @param {string} opt.language - language code
|
||||||
|
* @param {string} opts.voice - voice identifier
|
||||||
|
* @param {string} opts.text - text or ssml to synthesize
|
||||||
|
* @returns object containing filepath to an mp3 file in the /tmp folder containing
|
||||||
|
* the synthesized audio, and a variable indicating whether it was served from cache
|
||||||
|
*/
|
||||||
|
async function getTtsVoices(client, logger, {vendor, credentials}) {
|
||||||
|
logger = logger || noopLogger;
|
||||||
|
|
||||||
|
assert.ok(['nuance', 'ibm'].includes(vendor),
|
||||||
|
`getTtsVoices not supported for vendor ${vendor}`);
|
||||||
|
|
||||||
|
switch (vendor) {
|
||||||
|
case 'nuance':
|
||||||
|
return getNuanceVoices(client, logger, credentials);
|
||||||
|
case 'ibm':
|
||||||
|
return getIbmVoices(client, logger, credentials);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
module.exports = getTtsVoices;
|
||||||
46
lib/purge-tts-cache.js
Normal file
46
lib/purge-tts-cache.js
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
const {noopLogger, makeSynthKey} = require('./utils');
|
||||||
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan TTS Cache and purge records, use specific settings to purge just one
|
||||||
|
* @param {object} opts - options
|
||||||
|
* @param {boolean} opts.all - purge all records or only one specific, true by default
|
||||||
|
* @param {string} opts.vendor - 'google' or 'aws' ('polly' is an alias for 'aws')
|
||||||
|
* @param {string} opts.language - language code
|
||||||
|
* @param {string} opts.voice - voice identifier
|
||||||
|
* @param {string} opts.text - text or ssml to synthesize
|
||||||
|
* @returns {object} result - {error, purgedCount}
|
||||||
|
*/
|
||||||
|
async function purgeTtsCache(client, logger, {all, vendor, language, voice, deploymentId, engine, text} = {all: true}) {
|
||||||
|
logger = logger || noopLogger;
|
||||||
|
|
||||||
|
let purgedCount = 0, error;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (all) {
|
||||||
|
const keys = await client.keysAsync('tts:*');
|
||||||
|
purgedCount = await client.delAsync(keys);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
const key = makeSynthKey({
|
||||||
|
vendor,
|
||||||
|
language: language || '',
|
||||||
|
voice: voice || deploymentId,
|
||||||
|
engine,
|
||||||
|
text,
|
||||||
|
});
|
||||||
|
purgedCount = await client.delAsync(key);
|
||||||
|
if (purgedCount === 0) error = 'Specified item not found';
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
debug(err, 'purgeTtsCache: Error');
|
||||||
|
logger.error(err, 'purgeTtsCache: Error');
|
||||||
|
error = err.message ?? 'Unknown Error';
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`purgeTtsCache: purged ${purgedCount} records`);
|
||||||
|
return {error, purgedCount};
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = purgeTtsCache;
|
||||||
436
lib/synth-audio.js
Normal file
436
lib/synth-audio.js
Normal file
@@ -0,0 +1,436 @@
|
|||||||
|
const assert = require('assert');
|
||||||
|
const fs = require('fs');
|
||||||
|
const bent = require('bent');
|
||||||
|
const ttsGoogle = require('@google-cloud/text-to-speech');
|
||||||
|
//const Polly = require('aws-sdk/clients/polly');
|
||||||
|
const { PollyClient, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly');
|
||||||
|
|
||||||
|
const sdk = require('microsoft-cognitiveservices-speech-sdk');
|
||||||
|
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
|
||||||
|
const { IamAuthenticator } = require('ibm-watson/auth');
|
||||||
|
const {
|
||||||
|
AudioConfig,
|
||||||
|
ResultReason,
|
||||||
|
SpeechConfig,
|
||||||
|
SpeechSynthesizer,
|
||||||
|
CancellationDetails,
|
||||||
|
SpeechSynthesisOutputFormat
|
||||||
|
} = sdk;
|
||||||
|
const {makeSynthKey, createNuanceClient, noopLogger, createRivaClient} = require('./utils');
|
||||||
|
const getNuanceAccessToken = require('./get-nuance-access-token');
|
||||||
|
const {
|
||||||
|
SynthesisRequest,
|
||||||
|
Voice,
|
||||||
|
AudioFormat,
|
||||||
|
AudioParameters,
|
||||||
|
PCM,
|
||||||
|
Input,
|
||||||
|
Text,
|
||||||
|
SSML,
|
||||||
|
EventParameters
|
||||||
|
} = require('../stubs/nuance/synthesizer_pb');
|
||||||
|
const {SynthesizeSpeechRequest} = require('../stubs/riva/proto/riva_tts_pb');
|
||||||
|
const {AudioEncoding} = require('../stubs/riva/proto/riva_audio_pb');
|
||||||
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
|
const EXPIRES = 3600 * 24; // cache tts for 24 hours
|
||||||
|
const TMP_FOLDER = '/tmp';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synthesize speech to an mp3 file, and also cache the generated speech
|
||||||
|
* in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
|
||||||
|
* time and again for speech synthesis of the same text.
|
||||||
|
* It is the responsibility of the caller to unlink the mp3 file after use.
|
||||||
|
*
|
||||||
|
* @param {*} client - redis client
|
||||||
|
* @param {*} logger - pino logger
|
||||||
|
* @param {object} opts - options
|
||||||
|
* @param {string} opts.vendor - 'google' or 'aws' ('polly' is an alias for 'aws')
|
||||||
|
* @param {string} opt.language - language code
|
||||||
|
* @param {string} opts.voice - voice identifier
|
||||||
|
* @param {string} opts.text - text or ssml to synthesize
|
||||||
|
* @param {boolean} opts.disableTtsCache - disable TTS Cache retrieval
|
||||||
|
* @returns object containing filepath to an mp3 file in the /tmp folder containing
|
||||||
|
* the synthesized audio, and a variable indicating whether it was served from cache
|
||||||
|
*/
|
||||||
|
async function synthAudio(client, logger, stats, {
|
||||||
|
vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId, disableTtsCache
|
||||||
|
}) {
|
||||||
|
let audioBuffer;
|
||||||
|
let servedFromCache = false;
|
||||||
|
let rtt;
|
||||||
|
logger = logger || noopLogger;
|
||||||
|
|
||||||
|
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor),
|
||||||
|
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
|
||||||
|
if ('google' === vendor) {
|
||||||
|
assert.ok(language, 'synthAudio requires language when google is used');
|
||||||
|
}
|
||||||
|
else if (['aws', 'polly'].includes(vendor)) {
|
||||||
|
assert.ok(voice, 'synthAudio requires voice when aws polly is used');
|
||||||
|
}
|
||||||
|
else if ('microsoft' === vendor) {
|
||||||
|
assert.ok(language || deploymentId, 'synthAudio requires language when microsoft is used');
|
||||||
|
assert.ok(voice || deploymentId, 'synthAudio requires voice when microsoft is used');
|
||||||
|
}
|
||||||
|
else if ('nuance' === vendor) {
|
||||||
|
assert.ok(voice, 'synthAudio requires voice when nuance is used');
|
||||||
|
assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used');
|
||||||
|
assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used');
|
||||||
|
}
|
||||||
|
else if ('nvidia' === vendor) {
|
||||||
|
assert.ok(voice, 'synthAudio requires voice when nvidia is used');
|
||||||
|
assert.ok(language, 'synthAudio requires language when nvidia is used');
|
||||||
|
assert.ok(credentials.riva_uri, 'synthAudio requires riva_uri in credentials when nuance is used');
|
||||||
|
}
|
||||||
|
else if ('ibm' === vendor) {
|
||||||
|
assert.ok(voice, 'synthAudio requires voice when ibm is used');
|
||||||
|
assert.ok(credentials.tts_region, 'synthAudio requires tts_region in credentials when ibm watson is used');
|
||||||
|
assert.ok(credentials.tts_api_key, 'synthAudio requires tts_api_key in credentials when nuance is used');
|
||||||
|
}
|
||||||
|
else if ('wellsaid' === vendor) {
|
||||||
|
language = 'en-US'; // WellSaid only supports English atm
|
||||||
|
assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
|
||||||
|
assert.ok(!text.startsWith('<speak'), 'wellsaid does not support SSML tags');
|
||||||
|
}
|
||||||
|
|
||||||
|
const key = makeSynthKey({
|
||||||
|
vendor,
|
||||||
|
language: language || '',
|
||||||
|
voice: voice || deploymentId,
|
||||||
|
engine,
|
||||||
|
text
|
||||||
|
});
|
||||||
|
let filePath;
|
||||||
|
if (['nuance', 'nvidia'].includes(vendor)) {
|
||||||
|
filePath = `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt || ''}`)}.r8`;
|
||||||
|
}
|
||||||
|
else filePath = `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt || ''}`)}.mp3`;
|
||||||
|
debug(`synth key is ${key}`);
|
||||||
|
let cached;
|
||||||
|
if (!disableTtsCache) {
|
||||||
|
cached = await client.getAsync(key);
|
||||||
|
}
|
||||||
|
if (cached) {
|
||||||
|
// found in cache - extend the expiry and use it
|
||||||
|
debug('result WAS found in cache');
|
||||||
|
servedFromCache = true;
|
||||||
|
stats.increment('tts.cache.requests', ['found:yes']);
|
||||||
|
audioBuffer = Buffer.from(cached, 'base64');
|
||||||
|
client.expireAsync(key, EXPIRES).catch((err) => logger.info(err, 'Error setting expires'));
|
||||||
|
}
|
||||||
|
if (!cached) {
|
||||||
|
// not found in cache - go get it from speech vendor and add to cache
|
||||||
|
debug('result was NOT found in cache');
|
||||||
|
stats.increment('tts.cache.requests', ['found:no']);
|
||||||
|
let vendorLabel = vendor;
|
||||||
|
const startAt = process.hrtime();
|
||||||
|
switch (vendor) {
|
||||||
|
case 'google':
|
||||||
|
audioBuffer = await synthGoogle(logger, {credentials, stats, language, voice, gender, text});
|
||||||
|
break;
|
||||||
|
case 'aws':
|
||||||
|
case 'polly':
|
||||||
|
vendorLabel = 'aws';
|
||||||
|
audioBuffer = await synthPolly(logger, {credentials, stats, language, voice, text, engine});
|
||||||
|
break;
|
||||||
|
case 'azure':
|
||||||
|
case 'microsoft':
|
||||||
|
vendorLabel = 'microsoft';
|
||||||
|
audioBuffer = await synthMicrosoft(logger, {credentials, stats, language, voice, text, deploymentId, filePath});
|
||||||
|
break;
|
||||||
|
case 'nuance':
|
||||||
|
model = model || 'enhanced';
|
||||||
|
audioBuffer = await synthNuance(client, logger, {credentials, stats, voice, model, text});
|
||||||
|
break;
|
||||||
|
case 'nvidia':
|
||||||
|
audioBuffer = await synthNvidia(client, logger, {credentials, stats, language, voice, model, text});
|
||||||
|
break;
|
||||||
|
case 'ibm':
|
||||||
|
audioBuffer = await synthIbm(logger, {credentials, stats, voice, text});
|
||||||
|
break;
|
||||||
|
case 'wellsaid':
|
||||||
|
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(`synthAudio: unsupported speech vendor ${vendor}`);
|
||||||
|
}
|
||||||
|
const diff = process.hrtime(startAt);
|
||||||
|
const time = diff[0] * 1e3 + diff[1] * 1e-6;
|
||||||
|
rtt = time.toFixed(0);
|
||||||
|
stats.histogram('tts.response_time', rtt, [`vendor:${vendorLabel}`]);
|
||||||
|
debug(`tts rtt time for ${text.length} chars on ${vendorLabel}: ${rtt}`);
|
||||||
|
logger.info(`tts rtt time for ${text.length} chars on ${vendorLabel}: ${rtt}`);
|
||||||
|
|
||||||
|
client.setexAsync(key, EXPIRES, audioBuffer.toString('base64'))
|
||||||
|
.catch((err) => logger.error(err, `error calling setex on key ${key}`));
|
||||||
|
|
||||||
|
if (['microsoft'].includes(vendor)) return {filePath, servedFromCache, rtt};
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
fs.writeFile(filePath, audioBuffer, (err) => {
|
||||||
|
if (err) return reject(err);
|
||||||
|
resolve({filePath, servedFromCache, rtt});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const synthPolly = async(logger, {credentials, stats, language, voice, engine, text}) => {
|
||||||
|
try {
|
||||||
|
const polly = new PollyClient(credentials);
|
||||||
|
const opts = {
|
||||||
|
Engine: engine,
|
||||||
|
OutputFormat: 'mp3',
|
||||||
|
Text: text,
|
||||||
|
LanguageCode: language,
|
||||||
|
TextType: text.startsWith('<speak>') ? 'ssml' : 'text',
|
||||||
|
VoiceId: voice
|
||||||
|
};
|
||||||
|
const command = new SynthesizeSpeechCommand(opts);
|
||||||
|
const data = await polly.send(command);
|
||||||
|
const chunks = [];
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
data.AudioStream
|
||||||
|
.on('error', (err) => {
|
||||||
|
logger.info({err}, 'synthAudio: Error synthesizing speech using aws polly');
|
||||||
|
stats.increment('tts.count', ['vendor:aws', 'accepted:no']);
|
||||||
|
reject(err);
|
||||||
|
})
|
||||||
|
.on('data', (chunk) => {
|
||||||
|
chunks.push(chunk);
|
||||||
|
})
|
||||||
|
.on('end', () => resolve(Buffer.concat(chunks)));
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
logger.info({err}, 'synthAudio: Error synthesizing speech using aws polly');
|
||||||
|
stats.increment('tts.count', ['vendor:aws', 'accepted:no']);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const synthGoogle = async(logger, {credentials, stats, language, voice, gender, text}) => {
|
||||||
|
const client = new ttsGoogle.TextToSpeechClient(credentials);
|
||||||
|
const opts = {
|
||||||
|
voice: {
|
||||||
|
name: voice,
|
||||||
|
languageCode: language,
|
||||||
|
ssmlGender: gender || 'SSML_VOICE_GENDER_UNSPECIFIED'
|
||||||
|
},
|
||||||
|
audioConfig: {audioEncoding: 'MP3'}
|
||||||
|
};
|
||||||
|
Object.assign(opts, {input: text.startsWith('<speak>') ? {ssml: text} : {text}});
|
||||||
|
try {
|
||||||
|
const responses = await client.synthesizeSpeech(opts);
|
||||||
|
stats.increment('tts.count', ['vendor:google', 'accepted:yes']);
|
||||||
|
client.close();
|
||||||
|
return responses[0].audioContent;
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
logger.info({err, opts}, 'synthAudio: Error synthesizing speech using google');
|
||||||
|
stats.increment('tts.count', ['vendor:google', 'accepted:no']);
|
||||||
|
client && client.close();
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const synthIbm = async(logger, {credentials, stats, voice, text}) => {
|
||||||
|
const {tts_api_key, tts_region} = credentials;
|
||||||
|
const params = {
|
||||||
|
text,
|
||||||
|
voice,
|
||||||
|
accept: 'audio/mp3'
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const textToSpeech = new TextToSpeechV1({
|
||||||
|
authenticator: new IamAuthenticator({
|
||||||
|
apikey: tts_api_key,
|
||||||
|
}),
|
||||||
|
serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
|
||||||
|
});
|
||||||
|
|
||||||
|
const r = await textToSpeech.synthesize(params);
|
||||||
|
const chunks = [];
|
||||||
|
for await (const chunk of r.result) {
|
||||||
|
chunks.push(chunk);
|
||||||
|
}
|
||||||
|
return Buffer.concat(chunks);
|
||||||
|
} catch (err) {
|
||||||
|
logger.info({err, params}, 'synthAudio: Error synthesizing speech using ibm');
|
||||||
|
stats.increment('tts.count', ['vendor:ibm', 'accepted:no']);
|
||||||
|
throw new Error(err.statusText || err.message);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const synthMicrosoft = async(logger, {
|
||||||
|
credentials,
|
||||||
|
stats,
|
||||||
|
language,
|
||||||
|
voice,
|
||||||
|
text,
|
||||||
|
filePath
|
||||||
|
}) => {
|
||||||
|
try {
|
||||||
|
const {api_key: apiKey, region, use_custom_tts, custom_tts_endpoint} = credentials;
|
||||||
|
let content = text;
|
||||||
|
const speechConfig = SpeechConfig.fromSubscription(apiKey, region);
|
||||||
|
speechConfig.speechSynthesisLanguage = language;
|
||||||
|
speechConfig.speechSynthesisVoiceName = voice;
|
||||||
|
if (use_custom_tts && custom_tts_endpoint) {
|
||||||
|
speechConfig.endpointId = custom_tts_endpoint;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note: it seems that to use custom voice ssml is required with the voice attribute
|
||||||
|
* Otherwise sending plain text we get "Voice does not match"
|
||||||
|
*/
|
||||||
|
if (!content.startsWith('<speak')) content = `<speak>${text}</speak>`;
|
||||||
|
}
|
||||||
|
speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3;
|
||||||
|
const config = AudioConfig.fromAudioFileOutput(filePath);
|
||||||
|
const synthesizer = new SpeechSynthesizer(speechConfig, config);
|
||||||
|
|
||||||
|
if (content.startsWith('<speak>')) {
|
||||||
|
/* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
|
||||||
|
const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
|
||||||
|
// eslint-disable-next-line max-len
|
||||||
|
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
|
||||||
|
logger.info({content}, 'synthMicrosoft');
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const speakAsync = content.startsWith('<speak') ?
|
||||||
|
synthesizer.speakSsmlAsync.bind(synthesizer) :
|
||||||
|
synthesizer.speakTextAsync.bind(synthesizer);
|
||||||
|
speakAsync(
|
||||||
|
content,
|
||||||
|
async(result) => {
|
||||||
|
switch (result.reason) {
|
||||||
|
case ResultReason.Canceled:
|
||||||
|
const cancellation = CancellationDetails.fromResult(result);
|
||||||
|
logger.info({reason: cancellation.errorDetails}, 'synthAudio: (Microsoft) synthesis canceled');
|
||||||
|
synthesizer.close();
|
||||||
|
reject(cancellation.errorDetails);
|
||||||
|
break;
|
||||||
|
case ResultReason.SynthesizingAudioCompleted:
|
||||||
|
stats.increment('tts.count', ['vendor:microsoft', 'accepted:yes']);
|
||||||
|
synthesizer.close();
|
||||||
|
fs.readFile(filePath, (err, data) => {
|
||||||
|
if (err) return reject(err);
|
||||||
|
resolve(data);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
logger.info({result}, 'synthAudio: (Microsoft) unexpected result');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
(err) => {
|
||||||
|
logger.info({err}, 'synthAudio: (Microsoft) error synthesizing');
|
||||||
|
stats.increment('tts.count', ['vendor:microsoft', 'accepted:no']);
|
||||||
|
synthesizer.close();
|
||||||
|
reject(err);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
logger.info({err}, 'synthAudio: Error synthesizing speech using Microsoft');
|
||||||
|
stats.increment('tts.count', ['vendor:google', 'accepted:no']);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const synthWellSaid = async(logger, {credentials, stats, language, voice, gender, text}) => {
|
||||||
|
const {api_key} = credentials;
|
||||||
|
try {
|
||||||
|
const post = bent('https://api.wellsaidlabs.com', 'POST', 'buffer', {
|
||||||
|
'X-Api-Key': api_key,
|
||||||
|
'Accept': 'audio/mpeg',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
});
|
||||||
|
const mp3 = await post('/v1/tts/stream', {
|
||||||
|
text,
|
||||||
|
speaker_id: voice
|
||||||
|
});
|
||||||
|
return mp3;
|
||||||
|
} catch (err) {
|
||||||
|
logger.info({err}, 'testWellSaidTts returned error');
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const synthNuance = async(client, logger, {credentials, stats, voice, model, text}) => {
|
||||||
|
/* get a nuance access token */
|
||||||
|
const {client_id, secret} = credentials;
|
||||||
|
const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts');
|
||||||
|
const nuanceClient = await createNuanceClient(access_token);
|
||||||
|
|
||||||
|
const v = new Voice();
|
||||||
|
const p = new AudioParameters();
|
||||||
|
const f = new AudioFormat();
|
||||||
|
const pcm = new PCM();
|
||||||
|
const params = new EventParameters();
|
||||||
|
const request = new SynthesisRequest();
|
||||||
|
const input = new Input();
|
||||||
|
|
||||||
|
if (text.startsWith('<speak')) {
|
||||||
|
const ssml = new SSML();
|
||||||
|
ssml.setText(text);
|
||||||
|
input.setSsml(ssml);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const t = new Text();
|
||||||
|
t.setText(text);
|
||||||
|
input.setText(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
pcm.setSampleRateHz(8000);
|
||||||
|
f.setPcm(pcm);
|
||||||
|
p.setAudioFormat(f);
|
||||||
|
v.setName(voice);
|
||||||
|
v.setModel(model);
|
||||||
|
request.setVoice(v);
|
||||||
|
request.setAudioParams(p);
|
||||||
|
request.setInput(input);
|
||||||
|
request.setEventParams(params);
|
||||||
|
request.setUserId('jambonz');
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
nuanceClient.unarySynthesize(request, (err, response) => {
|
||||||
|
if (err) {
|
||||||
|
console.error(err);
|
||||||
|
return reject(err);
|
||||||
|
}
|
||||||
|
const status = response.getStatus();
|
||||||
|
const code = status.getCode();
|
||||||
|
if (code !== 200) {
|
||||||
|
const message = status.getMessage();
|
||||||
|
const details = status.getDetails();
|
||||||
|
return reject({code, message, details});
|
||||||
|
}
|
||||||
|
resolve(Buffer.from(response.getAudio()));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const synthNvidia = async(client, logger, {credentials, stats, language, voice, model, text}) => {
|
||||||
|
const {riva_uri} = credentials;
|
||||||
|
const rivaClient = await createRivaClient(riva_uri);
|
||||||
|
|
||||||
|
const request = new SynthesizeSpeechRequest();
|
||||||
|
request.setVoiceName(voice);
|
||||||
|
request.setLanguageCode(language);
|
||||||
|
request.setSampleRateHz(8000);
|
||||||
|
request.setEncoding(AudioEncoding.LINEAR_PCM);
|
||||||
|
request.setText(text);
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
console.log(`language ${language} voice ${voice} model ${model} text ${text}`);
|
||||||
|
rivaClient.synthesize(request, (err, response) => {
|
||||||
|
if (err) {
|
||||||
|
console.error(err);
|
||||||
|
return reject(err);
|
||||||
|
}
|
||||||
|
resolve(Buffer.from(response.getAudio()));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = synthAudio;
|
||||||
25
lib/utils.js
Normal file
25
lib/utils.js
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
const crypto = require('crypto');
|
||||||
|
/**
|
||||||
|
* Future TODO: cache recently used connections to providers
|
||||||
|
* to avoid connection overhead during a call.
|
||||||
|
* Will need to periodically age them out to avoid memory leaks.
|
||||||
|
*/
|
||||||
|
//const nuanceClientMap = new Map();
|
||||||
|
|
||||||
|
function makeSynthKey({vendor, language, voice, engine = '', text}) {
|
||||||
|
const hash = crypto.createHash('sha1');
|
||||||
|
hash.update(`${language}:${vendor}:${voice}:${engine}:${text}`);
|
||||||
|
return `tts:${hash.digest('hex')}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const noopLogger = {
|
||||||
|
info: () => {},
|
||||||
|
debug: () => {},
|
||||||
|
error: () => {}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
makeSynthKey,
|
||||||
|
noopLogger
|
||||||
|
};
|
||||||
1902
package-lock.json
generated
1902
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -24,6 +24,7 @@
|
|||||||
},
|
},
|
||||||
"homepage": "https://github.com/jambonz/speech-utils#readme",
|
"homepage": "https://github.com/jambonz/speech-utils#readme",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@aws-sdk/client-polly": "^3.269.0",
|
||||||
"@google-cloud/text-to-speech": "^4.2.0",
|
"@google-cloud/text-to-speech": "^4.2.0",
|
||||||
"@grpc/grpc-js": "^1.8.7",
|
"@grpc/grpc-js": "^1.8.7",
|
||||||
"@jambonz/realtimedb-helpers": "^0.6.3",
|
"@jambonz/realtimedb-helpers": "^0.6.3",
|
||||||
@@ -36,6 +37,7 @@
|
|||||||
"undici": "^5.18.0"
|
"undici": "^5.18.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"config": "^3.3.9",
|
||||||
"eslint": "^8.33.0",
|
"eslint": "^8.33.0",
|
||||||
"eslint-plugin-promise": "^6.1.1",
|
"eslint-plugin-promise": "^6.1.1",
|
||||||
"nyc": "^15.1.0",
|
"nyc": "^15.1.0",
|
||||||
|
|||||||
36
protos/riva/proto/riva_audio.proto
Normal file
36
protos/riva/proto/riva_audio.proto
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package nvidia.riva;
|
||||||
|
|
||||||
|
option cc_enable_arenas = true;
|
||||||
|
option go_package = "nvidia.com/riva_speech";
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AudioEncoding specifies the encoding of the audio bytes in the encapsulating message.
|
||||||
|
*/
|
||||||
|
enum AudioEncoding {
|
||||||
|
// Not specified.
|
||||||
|
ENCODING_UNSPECIFIED = 0;
|
||||||
|
|
||||||
|
// Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
||||||
|
LINEAR_PCM = 1;
|
||||||
|
|
||||||
|
// `FLAC` (Free Lossless Audio
|
||||||
|
// Codec) is the recommended encoding because it is
|
||||||
|
// lossless--therefore recognition is not compromised--and
|
||||||
|
// requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
|
||||||
|
// encoding supports 16-bit and 24-bit samples, however, not all fields in
|
||||||
|
// `STREAMINFO` are supported.
|
||||||
|
FLAC = 2;
|
||||||
|
|
||||||
|
// 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
|
||||||
|
MULAW = 3;
|
||||||
|
|
||||||
|
OGGOPUS = 4;
|
||||||
|
|
||||||
|
// 8-bit samples that compand 13-bit audio samples using G.711 PCMU/a-law.
|
||||||
|
ALAW = 20;
|
||||||
|
}
|
||||||
77
protos/riva/proto/riva_tts.proto
Normal file
77
protos/riva/proto/riva_tts.proto
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package nvidia.riva.tts;
|
||||||
|
|
||||||
|
option cc_enable_arenas = true;
|
||||||
|
option go_package = "nvidia.com/riva_speech";
|
||||||
|
|
||||||
|
import "riva/proto/riva_audio.proto";
|
||||||
|
|
||||||
|
service RivaSpeechSynthesis {
|
||||||
|
// Used to request text-to-speech from the service. Submit a request containing the
|
||||||
|
// desired text and configuration, and receive audio bytes in the requested format.
|
||||||
|
rpc Synthesize(SynthesizeSpeechRequest) returns (SynthesizeSpeechResponse) {}
|
||||||
|
|
||||||
|
// Used to request text-to-speech returned via stream as it becomes available.
|
||||||
|
// Submit a SynthesizeSpeechRequest with desired text and configuration,
|
||||||
|
// and receive stream of bytes in the requested format.
|
||||||
|
rpc SynthesizeOnline(SynthesizeSpeechRequest) returns (stream SynthesizeSpeechResponse) {}
|
||||||
|
|
||||||
|
//Enables clients to request the configuration of the current Synthesize service, or a specific model within the service.
|
||||||
|
rpc GetRivaSynthesisConfig(RivaSynthesisConfigRequest) returns (RivaSynthesisConfigResponse) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
message RivaSynthesisConfigRequest {
|
||||||
|
//If model is specified only return config for model, otherwise return all configs.
|
||||||
|
string model_name = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RivaSynthesisConfigResponse {
|
||||||
|
message Config {
|
||||||
|
string model_name = 1;
|
||||||
|
map<string,string> parameters = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
repeated Config model_config = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message SynthesizeSpeechRequest {
|
||||||
|
string text = 1;
|
||||||
|
string language_code = 2;
|
||||||
|
// audio encoding params
|
||||||
|
AudioEncoding encoding = 3;
|
||||||
|
|
||||||
|
// The sample rate in hertz (Hz) of the audio output requested through `SynthesizeSpeechRequest` messages.
|
||||||
|
// Models produce an output at a fixed rate. The sample rate enables you to resample the generated audio output if required.
|
||||||
|
// You use the sample rate to up-sample or down-sample the audio for various scenarios. For example, the sample rate can be set to 8kHz (kilohertz) if the output
|
||||||
|
// audio is desired for a low bandwidth application.
|
||||||
|
// The sample rate values below 8kHz will not produce any meaningful output. Also, up-sampling too much will increase the
|
||||||
|
// size of the output without improving the output audio quality.
|
||||||
|
|
||||||
|
int32 sample_rate_hz = 4;
|
||||||
|
// voice params
|
||||||
|
string voice_name = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message SynthesizeSpeechResponseMetadata {
|
||||||
|
// Currently experimental API addition that returns the input text
|
||||||
|
// after preprocessing has been completed as well as the predicted
|
||||||
|
// duration for each token.
|
||||||
|
// Note: this message is subject to future breaking changes, and potential
|
||||||
|
// removal.
|
||||||
|
string text = 1;
|
||||||
|
string processed_text = 2;
|
||||||
|
repeated float predicted_durations = 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
message SynthesizeSpeechResponse {
|
||||||
|
bytes audio = 1;
|
||||||
|
SynthesizeSpeechResponseMetadata meta = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
1
stubs/riva/proto/riva_audio_grpc_pb.js
Normal file
1
stubs/riva/proto/riva_audio_grpc_pb.js
Normal file
@@ -0,0 +1 @@
|
|||||||
|
// GENERATED CODE -- NO SERVICES IN PROTO
|
||||||
31
stubs/riva/proto/riva_audio_pb.js
Normal file
31
stubs/riva/proto/riva_audio_pb.js
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
// source: riva/proto/riva_audio.proto
|
||||||
|
/**
|
||||||
|
* @fileoverview
|
||||||
|
* @enhanceable
|
||||||
|
* @suppress {missingRequire} reports error on implicit type usages.
|
||||||
|
* @suppress {messageConventions} JS Compiler reports an error if a variable or
|
||||||
|
* field starts with 'MSG_' and isn't a translatable message.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
// GENERATED CODE -- DO NOT EDIT!
|
||||||
|
/* eslint-disable */
|
||||||
|
// @ts-nocheck
|
||||||
|
|
||||||
|
var jspb = require('google-protobuf');
|
||||||
|
var goog = jspb;
|
||||||
|
var global = Function('return this')();
|
||||||
|
|
||||||
|
goog.exportSymbol('proto.nvidia.riva.AudioEncoding', null, global);
|
||||||
|
/**
|
||||||
|
* @enum {number}
|
||||||
|
*/
|
||||||
|
proto.nvidia.riva.AudioEncoding = {
|
||||||
|
ENCODING_UNSPECIFIED: 0,
|
||||||
|
LINEAR_PCM: 1,
|
||||||
|
FLAC: 2,
|
||||||
|
MULAW: 3,
|
||||||
|
OGGOPUS: 4,
|
||||||
|
ALAW: 20
|
||||||
|
};
|
||||||
|
|
||||||
|
goog.object.extend(exports, proto.nvidia.riva);
|
||||||
99
stubs/riva/proto/riva_tts_grpc_pb.js
Normal file
99
stubs/riva/proto/riva_tts_grpc_pb.js
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
// GENERATED CODE -- DO NOT EDIT!
|
||||||
|
|
||||||
|
// Original file comments:
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
//
|
||||||
|
'use strict';
|
||||||
|
var grpc = require('@grpc/grpc-js');
|
||||||
|
var riva_proto_riva_tts_pb = require('../../riva/proto/riva_tts_pb.js');
|
||||||
|
var riva_proto_riva_audio_pb = require('../../riva/proto/riva_audio_pb.js');
|
||||||
|
|
||||||
|
function serialize_nvidia_riva_tts_RivaSynthesisConfigRequest(arg) {
|
||||||
|
if (!(arg instanceof riva_proto_riva_tts_pb.RivaSynthesisConfigRequest)) {
|
||||||
|
throw new Error('Expected argument of type nvidia.riva.tts.RivaSynthesisConfigRequest');
|
||||||
|
}
|
||||||
|
return Buffer.from(arg.serializeBinary());
|
||||||
|
}
|
||||||
|
|
||||||
|
function deserialize_nvidia_riva_tts_RivaSynthesisConfigRequest(buffer_arg) {
|
||||||
|
return riva_proto_riva_tts_pb.RivaSynthesisConfigRequest.deserializeBinary(new Uint8Array(buffer_arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
function serialize_nvidia_riva_tts_RivaSynthesisConfigResponse(arg) {
|
||||||
|
if (!(arg instanceof riva_proto_riva_tts_pb.RivaSynthesisConfigResponse)) {
|
||||||
|
throw new Error('Expected argument of type nvidia.riva.tts.RivaSynthesisConfigResponse');
|
||||||
|
}
|
||||||
|
return Buffer.from(arg.serializeBinary());
|
||||||
|
}
|
||||||
|
|
||||||
|
function deserialize_nvidia_riva_tts_RivaSynthesisConfigResponse(buffer_arg) {
|
||||||
|
return riva_proto_riva_tts_pb.RivaSynthesisConfigResponse.deserializeBinary(new Uint8Array(buffer_arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
function serialize_nvidia_riva_tts_SynthesizeSpeechRequest(arg) {
|
||||||
|
if (!(arg instanceof riva_proto_riva_tts_pb.SynthesizeSpeechRequest)) {
|
||||||
|
throw new Error('Expected argument of type nvidia.riva.tts.SynthesizeSpeechRequest');
|
||||||
|
}
|
||||||
|
return Buffer.from(arg.serializeBinary());
|
||||||
|
}
|
||||||
|
|
||||||
|
function deserialize_nvidia_riva_tts_SynthesizeSpeechRequest(buffer_arg) {
|
||||||
|
return riva_proto_riva_tts_pb.SynthesizeSpeechRequest.deserializeBinary(new Uint8Array(buffer_arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
function serialize_nvidia_riva_tts_SynthesizeSpeechResponse(arg) {
|
||||||
|
if (!(arg instanceof riva_proto_riva_tts_pb.SynthesizeSpeechResponse)) {
|
||||||
|
throw new Error('Expected argument of type nvidia.riva.tts.SynthesizeSpeechResponse');
|
||||||
|
}
|
||||||
|
return Buffer.from(arg.serializeBinary());
|
||||||
|
}
|
||||||
|
|
||||||
|
function deserialize_nvidia_riva_tts_SynthesizeSpeechResponse(buffer_arg) {
|
||||||
|
return riva_proto_riva_tts_pb.SynthesizeSpeechResponse.deserializeBinary(new Uint8Array(buffer_arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
var RivaSpeechSynthesisService = exports.RivaSpeechSynthesisService = {
|
||||||
|
// Used to request text-to-speech from the service. Submit a request containing the
|
||||||
|
// desired text and configuration, and receive audio bytes in the requested format.
|
||||||
|
synthesize: {
|
||||||
|
path: '/nvidia.riva.tts.RivaSpeechSynthesis/Synthesize',
|
||||||
|
requestStream: false,
|
||||||
|
responseStream: false,
|
||||||
|
requestType: riva_proto_riva_tts_pb.SynthesizeSpeechRequest,
|
||||||
|
responseType: riva_proto_riva_tts_pb.SynthesizeSpeechResponse,
|
||||||
|
requestSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechRequest,
|
||||||
|
requestDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechRequest,
|
||||||
|
responseSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechResponse,
|
||||||
|
responseDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechResponse,
|
||||||
|
},
|
||||||
|
// Used to request text-to-speech returned via stream as it becomes available.
|
||||||
|
// Submit a SynthesizeSpeechRequest with desired text and configuration,
|
||||||
|
// and receive stream of bytes in the requested format.
|
||||||
|
synthesizeOnline: {
|
||||||
|
path: '/nvidia.riva.tts.RivaSpeechSynthesis/SynthesizeOnline',
|
||||||
|
requestStream: false,
|
||||||
|
responseStream: true,
|
||||||
|
requestType: riva_proto_riva_tts_pb.SynthesizeSpeechRequest,
|
||||||
|
responseType: riva_proto_riva_tts_pb.SynthesizeSpeechResponse,
|
||||||
|
requestSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechRequest,
|
||||||
|
requestDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechRequest,
|
||||||
|
responseSerialize: serialize_nvidia_riva_tts_SynthesizeSpeechResponse,
|
||||||
|
responseDeserialize: deserialize_nvidia_riva_tts_SynthesizeSpeechResponse,
|
||||||
|
},
|
||||||
|
// Enables clients to request the configuration of the current Synthesize service, or a specific model within the service.
|
||||||
|
getRivaSynthesisConfig: {
|
||||||
|
path: '/nvidia.riva.tts.RivaSpeechSynthesis/GetRivaSynthesisConfig',
|
||||||
|
requestStream: false,
|
||||||
|
responseStream: false,
|
||||||
|
requestType: riva_proto_riva_tts_pb.RivaSynthesisConfigRequest,
|
||||||
|
responseType: riva_proto_riva_tts_pb.RivaSynthesisConfigResponse,
|
||||||
|
requestSerialize: serialize_nvidia_riva_tts_RivaSynthesisConfigRequest,
|
||||||
|
requestDeserialize: deserialize_nvidia_riva_tts_RivaSynthesisConfigRequest,
|
||||||
|
responseSerialize: serialize_nvidia_riva_tts_RivaSynthesisConfigResponse,
|
||||||
|
responseDeserialize: deserialize_nvidia_riva_tts_RivaSynthesisConfigResponse,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.RivaSpeechSynthesisClient = grpc.makeGenericClientConstructor(RivaSpeechSynthesisService);
|
||||||
1278
stubs/riva/proto/riva_tts_pb.js
Normal file
1278
stubs/riva/proto/riva_tts_pb.js
Normal file
File diff suppressed because it is too large
Load Diff
12
test/docker_start.js
Normal file
12
test/docker_start.js
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
const test = require('tape').test ;
|
||||||
|
const exec = require('child_process').exec ;
|
||||||
|
|
||||||
|
test('starting docker network..', (t) => {
|
||||||
|
exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
|
||||||
|
setTimeout(() => {
|
||||||
|
t.end(err);
|
||||||
|
}, 2000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
12
test/docker_stop.js
Normal file
12
test/docker_stop.js
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
const test = require('tape').test ;
|
||||||
|
const exec = require('child_process').exec ;
|
||||||
|
|
||||||
|
test('stopping docker network..', (t) => {
|
||||||
|
t.timeoutAfter(10000);
|
||||||
|
exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
|
||||||
|
//console.log(`stderr: ${stderr}`);
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
|
t.end() ;
|
||||||
|
});
|
||||||
|
|
||||||
78
test/ibm.js
Normal file
78
test/ibm.js
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
const test = require('tape').test ;
|
||||||
|
const config = require('config');
|
||||||
|
const opts = config.get('redis');
|
||||||
|
const fs = require('fs');
|
||||||
|
const logger = require('pino')({level: 'error'});
|
||||||
|
process.on('unhandledRejection', (reason, p) => {
|
||||||
|
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
|
||||||
|
});
|
||||||
|
|
||||||
|
const stats = {
|
||||||
|
increment: () => {},
|
||||||
|
histogram: () => {}
|
||||||
|
};
|
||||||
|
|
||||||
|
test('IBM - create access key', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {client, getIbmAccessToken} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.IBM_API_KEY ) {
|
||||||
|
t.pass('skipping IBM test since no IBM api_key provided');
|
||||||
|
t.end();
|
||||||
|
client.quit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
|
||||||
|
//console.log({obj}, 'received access token from IBM');
|
||||||
|
t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
|
||||||
|
|
||||||
|
obj = await getIbmAccessToken(process.env.IBM_API_KEY);
|
||||||
|
//console.log({obj}, 'received access token from IBM - second request');
|
||||||
|
t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
|
||||||
|
|
||||||
|
await client.flushallAsync();
|
||||||
|
t.end();
|
||||||
|
}
|
||||||
|
catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('IBM - retrieve tts voices test', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {client, getTtsVoices} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
|
||||||
|
t.pass('skipping IBM test since no IBM api_key and/or region provided');
|
||||||
|
t.end();
|
||||||
|
client.quit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const opts = {
|
||||||
|
vendor: 'ibm',
|
||||||
|
credentials: {
|
||||||
|
tts_api_key: process.env.IBM_TTS_API_KEY,
|
||||||
|
tts_region: process.env.IBM_TTS_REGION
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const obj = await getTtsVoices(opts);
|
||||||
|
const {voices} = obj.result;
|
||||||
|
//console.log(JSON.stringify(voices));
|
||||||
|
t.ok(voices.length > 0 && voices[0].language,
|
||||||
|
`GetVoices: successfully retrieved ${voices.length} voices from IBM`);
|
||||||
|
|
||||||
|
await client.flushallAsync();
|
||||||
|
|
||||||
|
t.end();
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
5
test/index.js
Normal file
5
test/index.js
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
require('./docker_start');
|
||||||
|
require('./synth');
|
||||||
|
require('./nuance');
|
||||||
|
require('./ibm');
|
||||||
|
require('./docker_stop');
|
||||||
50
test/nuance.js
Normal file
50
test/nuance.js
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
const test = require('tape').test ;
|
||||||
|
const config = require('config');
|
||||||
|
const opts = config.get('redis');
|
||||||
|
const fs = require('fs');
|
||||||
|
const logger = require('pino')({level: 'error'});
|
||||||
|
process.on('unhandledRejection', (reason, p) => {
|
||||||
|
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
|
||||||
|
});
|
||||||
|
|
||||||
|
const stats = {
|
||||||
|
increment: () => {},
|
||||||
|
histogram: () => {}
|
||||||
|
};
|
||||||
|
|
||||||
|
test('Nuance tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {client, getTtsVoices} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.NUANCE_CLIENT_ID || !process.env.NUANCE_SECRET ) {
|
||||||
|
t.pass('skipping Nuance test since no Nuance client_id and secret provided');
|
||||||
|
t.end();
|
||||||
|
client.quit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const opts = {
|
||||||
|
vendor: 'nuance',
|
||||||
|
credentials: {
|
||||||
|
client_id: process.env.NUANCE_CLIENT_ID,
|
||||||
|
secret: process.env.NUANCE_SECRET
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let voices = await getTtsVoices(opts);
|
||||||
|
//console.log(`received ${voices.length} voices from Nuance`);
|
||||||
|
//console.log(JSON.stringify(voices));
|
||||||
|
t.ok(voices.length > 0 && voices[0].language,
|
||||||
|
`GetVoices: successfully retrieved ${voices.length} voices from Nuance`);
|
||||||
|
|
||||||
|
await client.flushallAsync();
|
||||||
|
|
||||||
|
t.end();
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
382
test/synth.js
Normal file
382
test/synth.js
Normal file
@@ -0,0 +1,382 @@
|
|||||||
|
const test = require('tape').test;
|
||||||
|
const config = require('config');
|
||||||
|
const opts = config.get('redis');
|
||||||
|
const fs = require('fs');
|
||||||
|
const {makeSynthKey} = require('../lib/utils');
|
||||||
|
const logger = require('pino')();
|
||||||
|
|
||||||
|
process.on('unhandledRejection', (reason, p) => {
|
||||||
|
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
|
||||||
|
});
|
||||||
|
|
||||||
|
const stats = {
|
||||||
|
increment: () => {
|
||||||
|
},
|
||||||
|
histogram: () => {
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
test('Google speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.GCP_FILE && !process.env.GCP_JSON_KEY) {
|
||||||
|
t.pass('skipping google speech synth tests since neither GCP_FILE nor GCP_JSON_KEY provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const str = process.env.GCP_JSON_KEY || fs.readFileSync(process.env.GCP_FILE);
|
||||||
|
const creds = JSON.parse(str);
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'google',
|
||||||
|
credentials: {
|
||||||
|
credentials: {
|
||||||
|
client_email: creds.client_email,
|
||||||
|
private_key: creds.private_key,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
language: 'en-GB',
|
||||||
|
gender: 'MALE',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
salt: 'foo.bar',
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized google audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'google',
|
||||||
|
credentials: {
|
||||||
|
credentials: {
|
||||||
|
client_email: creds.client_email,
|
||||||
|
private_key: creds.private_key,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
language: 'en-GB',
|
||||||
|
gender: 'MALE',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(opts.servedFromCache, `successfully retrieved cached google audio from ${opts.filePath}`);
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'google',
|
||||||
|
credentials: {
|
||||||
|
credentials: {
|
||||||
|
client_email: creds.client_email,
|
||||||
|
private_key: creds.private_key,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
disableTtsCache: true,
|
||||||
|
language: 'en-GB',
|
||||||
|
gender: 'MALE',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized google audio regardless of current cache to ${opts.filePath}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('AWS speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_SECRET_ACCESS_KEY || !process.env.AWS_REGION) {
|
||||||
|
t.pass('skipping AWS speech synth tests since AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, or AWS_REGION not provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'aws',
|
||||||
|
credentials: {
|
||||||
|
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||||
|
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||||
|
region: process.env.AWS_REGION,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'Joey',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized aws audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'aws',
|
||||||
|
credentials: {
|
||||||
|
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||||
|
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||||
|
region: process.env.AWS_REGION,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'Joey',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(opts.servedFromCache, `successfully retrieved aws audio from cache ${opts.filePath}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Azure speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.MICROSOFT_API_KEY || !process.env.MICROSOFT_REGION) {
|
||||||
|
t.pass('skipping Microsoft speech synth tests since MICROSOFT_API_KEY or MICROSOFT_REGION not provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const longText = `Henry is best known for his six marriages, including his efforts to have his first marriage
|
||||||
|
(to Catherine of Aragon) annulled. His disagreement with Pope Clement VII about such an
|
||||||
|
annulment led Henry to initiate the English Reformation,
|
||||||
|
separating the Church of England from papal authority. He appointed himself Supreme Head of the Church of England
|
||||||
|
and dissolved convents and monasteries, for which he was excommunicated.
|
||||||
|
Henry is also known as "the father of the Royal Navy," as he invested heavily in the navy,
|
||||||
|
increasing its size from a few to more than 50 ships, and established the Navy Board.`;
|
||||||
|
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'microsoft',
|
||||||
|
credentials: {
|
||||||
|
api_key: process.env.MICROSOFT_API_KEY,
|
||||||
|
region: process.env.MICROSOFT_REGION,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'en-US-ChristopherNeural',
|
||||||
|
text: longText,
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized microsoft audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'microsoft',
|
||||||
|
credentials: {
|
||||||
|
api_key: process.env.MICROSOFT_API_KEY,
|
||||||
|
region: process.env.MICROSOFT_REGION,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'en-US-ChristopherNeural',
|
||||||
|
text: longText,
|
||||||
|
});
|
||||||
|
t.ok(opts.servedFromCache, `successfully retrieved microsoft audio from cache ${opts.filePath}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Azure custom voice speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.MICROSOFT_CUSTOM_API_KEY || !process.env.MICROSOFT_DEPLOYMENT_ID || !process.env.MICROSOFT_CUSTOM_REGION) {
|
||||||
|
t.pass('skipping Microsoft speech synth custom voice tests since MICROSOFT_CUSTOM_API_KEY or MICROSOFT_DEPLOYMENT_ID or MICROSOFT_CUSTOM_REGION not provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const text = 'Hi, this is my custom voice. How does it sound to you? Do I have a future as a virtual bot?';
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'microsoft',
|
||||||
|
credentials: {
|
||||||
|
api_key: process.env.MICROSOFT_CUSTOM_API_KEY,
|
||||||
|
region: process.env.MICROSOFT_CUSTOM_REGION,
|
||||||
|
use_custom_tts: true,
|
||||||
|
custom_tts_endpoint: process.env.MICROSOFT_DEPLOYMENT_ID,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: process.env.MICROSOFT_CUSTOM_VOICE,
|
||||||
|
text,
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized microsoft audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'microsoft',
|
||||||
|
credentials: {
|
||||||
|
api_key: process.env.MICROSOFT_CUSTOM_API_KEY,
|
||||||
|
region: process.env.MICROSOFT_CUSTOM_REGION,
|
||||||
|
use_custom_tts: true,
|
||||||
|
custom_tts_endpoint: process.env.MICROSOFT_DEPLOYMENT_ID,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: process.env.MICROSOFT_CUSTOM_VOICE,
|
||||||
|
text,
|
||||||
|
});
|
||||||
|
t.ok(opts.servedFromCache, `successfully retrieved microsoft custom voice audio from cache ${opts.filePath}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Nuance speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.NUANCE_CLIENT_ID || !process.env.NUANCE_SECRET) {
|
||||||
|
t.pass('skipping Nuance speech synth tests since NUANCE_CLIENT_ID or NUANCE_SECRET not provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'nuance',
|
||||||
|
credentials: {
|
||||||
|
client_id: process.env.NUANCE_CLIENT_ID,
|
||||||
|
secret: process.env.NUANCE_SECRET,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'Evan',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'nuance',
|
||||||
|
credentials: {
|
||||||
|
client_id: process.env.NUANCE_CLIENT_ID,
|
||||||
|
secret: process.env.NUANCE_SECRET,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'Evan',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(opts.servedFromCache, `successfully retrieved nuance audio from cache ${opts.filePath}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Nvidia speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.RIVA_URI) {
|
||||||
|
t.pass('skipping Nvidia speech synth tests since RIVA_URI not provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'nvidia',
|
||||||
|
credentials: {
|
||||||
|
riva_uri: process.env.RIVA_URI,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'English-US.Female-1',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'nvidia',
|
||||||
|
credentials: {
|
||||||
|
riva_uri: process.env.RIVA_URI,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'English-US.Female-1',
|
||||||
|
text: 'This is a test. This is only a test',
|
||||||
|
});
|
||||||
|
t.ok(opts.servedFromCache, `successfully retrieved nuance audio from cache ${opts.filePath}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('IBM watson speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
|
||||||
|
t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
|
||||||
|
try {
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'ibm',
|
||||||
|
credentials: {
|
||||||
|
tts_api_key: process.env.IBM_TTS_API_KEY,
|
||||||
|
tts_region: process.env.IBM_TTS_REGION,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'en-US_AllisonV2Voice',
|
||||||
|
text,
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized ibm audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
opts = await synthAudio(stats, {
|
||||||
|
vendor: 'ibm',
|
||||||
|
credentials: {
|
||||||
|
tts_api_key: process.env.IBM_TTS_API_KEY,
|
||||||
|
tts_region: process.env.IBM_TTS_REGION,
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'en-US_AllisonV2Voice',
|
||||||
|
text,
|
||||||
|
});
|
||||||
|
t.ok(opts.servedFromCache, `successfully retrieved ibm audio from cache ${opts.filePath}`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(JSON.stringify(err));
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('TTS Cache tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {purgeTtsCache, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// save some random tts keys to cache
|
||||||
|
const minRecords = 8;
|
||||||
|
for (const i in Array(minRecords).fill(0)) {
|
||||||
|
await client.setAsync(makeSynthKey({vendor: i, language: i, voice: i, engine: i, text: i}), i);
|
||||||
|
}
|
||||||
|
const {purgedCount} = await purgeTtsCache();
|
||||||
|
t.ok(purgedCount >= minRecords, `successfully purged at least ${minRecords} tts records from cache`);
|
||||||
|
|
||||||
|
const cached = (await client.keysAsync('tts:*')).length;
|
||||||
|
t.equal(cached, 0, `successfully purged all tts records from cache`);
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
console.error(JSON.stringify(err));
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// save some random tts keys to cache
|
||||||
|
for (const i in Array(10).fill(0)) {
|
||||||
|
await client.setAsync(makeSynthKey({vendor: i, language: i, voice: i, engine: i, text: i}), i);
|
||||||
|
}
|
||||||
|
// save a specific key to tts cache
|
||||||
|
const opts = {vendor: 'aws', language: 'en-US', voice: 'MALE', engine: 'Engine', text: 'Hello World!'};
|
||||||
|
await client.setAsync(makeSynthKey(opts), opts.text);
|
||||||
|
|
||||||
|
const {purgedCount} = await purgeTtsCache({all: false, ...opts});
|
||||||
|
t.ok(purgedCount === 1, `successfully purged one specific tts record from cache`);
|
||||||
|
|
||||||
|
// returns error for unknown key
|
||||||
|
const {purgedCount: purgedCountWhenErrored, error} = await purgeTtsCache({
|
||||||
|
all: false,
|
||||||
|
vendor: 'non-existing',
|
||||||
|
language: 'non-existing',
|
||||||
|
voice: 'non-existing',
|
||||||
|
});
|
||||||
|
t.ok(purgedCountWhenErrored === 0, `purged no records when specified key was not found`);
|
||||||
|
t.ok(error, `error returned when specified key was not found`);
|
||||||
|
|
||||||
|
// make sure other tts keys are still there
|
||||||
|
const cached = (await client.keysAsync('tts:*')).length;
|
||||||
|
t.ok(cached >= 1, `successfully kept all non-specified tts records in cache`);
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
console.error(JSON.stringify(err));
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
2054
test/tmp/redis.conf
Normal file
2054
test/tmp/redis.conf
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user