mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
add verbio tts/stt
This commit is contained in:
1
index.js
1
index.js
@@ -14,6 +14,7 @@ module.exports = (opts, logger) => {
|
|||||||
purgeTtsCache: require('./lib/purge-tts-cache').bind(null, client, logger),
|
purgeTtsCache: require('./lib/purge-tts-cache').bind(null, client, logger),
|
||||||
addFileToCache: require('./lib/add-file-to-cache').bind(null, client, logger),
|
addFileToCache: require('./lib/add-file-to-cache').bind(null, client, logger),
|
||||||
synthAudio: require('./lib/synth-audio').bind(null, client, createHash, retrieveHash, logger),
|
synthAudio: require('./lib/synth-audio').bind(null, client, createHash, retrieveHash, logger),
|
||||||
|
getVerbioAccessToken: require('./lib/get-verbio-token').bind(null, client, logger),
|
||||||
getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
|
getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
|
||||||
getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
|
getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
|
||||||
getAwsAuthToken: require('./lib/get-aws-sts-token').bind(null, logger, createHash, retrieveHash),
|
getAwsAuthToken: require('./lib/get-aws-sts-token').bind(null, logger, createHash, retrieveHash),
|
||||||
|
|||||||
3
lib/constants.js
Normal file
3
lib/constants.js
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
module.exports = {
|
||||||
|
HTTP_TIMEOUT: 5000
|
||||||
|
};
|
||||||
@@ -2,8 +2,8 @@ const formurlencoded = require('form-urlencoded');
|
|||||||
const {Pool} = require('undici');
|
const {Pool} = require('undici');
|
||||||
const pool = new Pool('https://iam.cloud.ibm.com');
|
const pool = new Pool('https://iam.cloud.ibm.com');
|
||||||
const {makeIbmKey, noopLogger} = require('./utils');
|
const {makeIbmKey, noopLogger} = require('./utils');
|
||||||
|
const { HTTP_TIMEOUT } = require('./constants');
|
||||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
const HTTP_TIMEOUT = 5000;
|
|
||||||
|
|
||||||
async function getIbmAccessToken(client, logger, apiKey) {
|
async function getIbmAccessToken(client, logger, apiKey) {
|
||||||
logger = logger || noopLogger;
|
logger = logger || noopLogger;
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ const formurlencoded = require('form-urlencoded');
|
|||||||
const {Pool} = require('undici');
|
const {Pool} = require('undici');
|
||||||
const pool = new Pool('https://auth.crt.nuance.com');
|
const pool = new Pool('https://auth.crt.nuance.com');
|
||||||
const {makeNuanceKey, makeBasicAuthHeader, noopLogger} = require('./utils');
|
const {makeNuanceKey, makeBasicAuthHeader, noopLogger} = require('./utils');
|
||||||
|
const { HTTP_TIMEOUT } = require('./constants');
|
||||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
const HTTP_TIMEOUT = 5000;
|
|
||||||
|
|
||||||
async function getNuanceAccessToken(client, logger, clientId, secret, scope) {
|
async function getNuanceAccessToken(client, logger, clientId, secret, scope) {
|
||||||
logger = logger || noopLogger;
|
logger = logger || noopLogger;
|
||||||
|
|||||||
@@ -1,12 +1,16 @@
|
|||||||
const assert = require('assert');
|
const assert = require('assert');
|
||||||
const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils');
|
const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils');
|
||||||
const getNuanceAccessToken = require('./get-nuance-access-token');
|
const getNuanceAccessToken = require('./get-nuance-access-token');
|
||||||
|
const getVerbioAccessToken = require('./get-verbio-token');
|
||||||
const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
|
const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
|
||||||
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
|
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
|
||||||
const { IamAuthenticator } = require('ibm-watson/auth');
|
const { IamAuthenticator } = require('ibm-watson/auth');
|
||||||
const ttsGoogle = require('@google-cloud/text-to-speech');
|
const ttsGoogle = require('@google-cloud/text-to-speech');
|
||||||
const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
|
const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
|
||||||
const getAwsAuthToken = require('./get-aws-sts-token');
|
const getAwsAuthToken = require('./get-aws-sts-token');
|
||||||
|
const {Pool} = require('undici');
|
||||||
|
const { HTTP_TIMEOUT } = require('./constants');
|
||||||
|
const verbioVoicePool = new Pool('https://us.rest.speechcenter.verbio.com');
|
||||||
|
|
||||||
const getIbmVoices = async(client, logger, credentials) => {
|
const getIbmVoices = async(client, logger, credentials) => {
|
||||||
const {tts_region, tts_api_key} = credentials;
|
const {tts_region, tts_api_key} = credentials;
|
||||||
@@ -117,6 +121,26 @@ const getAwsVoices = async(_client, createHash, retrieveHash, logger, credential
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getVerbioVoices = async(client, logger, credentials) => {
|
||||||
|
try {
|
||||||
|
const access_token = await getVerbioAccessToken(client, logger, credentials);
|
||||||
|
const { body} = await verbioVoicePool.request({
|
||||||
|
path: '/api/v1/voices',
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${access_token.access_token}`,
|
||||||
|
'User-Agent': 'jambonz'
|
||||||
|
},
|
||||||
|
timeout: HTTP_TIMEOUT,
|
||||||
|
followRedirects: false
|
||||||
|
});
|
||||||
|
return await body.json();
|
||||||
|
} catch (err) {
|
||||||
|
logger.info({err}, 'getVerbioVoices - failed to list voices for Verbio');
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Synthesize speech to an mp3 file, and also cache the generated speech
|
* Synthesize speech to an mp3 file, and also cache the generated speech
|
||||||
* in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
|
* in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
|
||||||
@@ -136,7 +160,7 @@ const getAwsVoices = async(_client, createHash, retrieveHash, logger, credential
|
|||||||
async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
|
async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
|
||||||
logger = logger || noopLogger;
|
logger = logger || noopLogger;
|
||||||
|
|
||||||
assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly'].includes(vendor),
|
assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
|
||||||
`getTtsVoices not supported for vendor ${vendor}`);
|
`getTtsVoices not supported for vendor ${vendor}`);
|
||||||
|
|
||||||
switch (vendor) {
|
switch (vendor) {
|
||||||
@@ -149,6 +173,8 @@ async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, c
|
|||||||
case 'aws':
|
case 'aws':
|
||||||
case 'polly':
|
case 'polly':
|
||||||
return getAwsVoices(client, createHash, retrieveHash, logger, credentials);
|
return getAwsVoices(client, createHash, retrieveHash, logger, credentials);
|
||||||
|
case 'verbio':
|
||||||
|
return getVerbioVoices(client, logger, credentials);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
51
lib/get-verbio-token.js
Normal file
51
lib/get-verbio-token.js
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
const {Pool} = require('undici');
|
||||||
|
const { noopLogger, makeVerbioKey } = require('./utils');
|
||||||
|
const { HTTP_TIMEOUT } = require('./constants');
|
||||||
|
const pool = new Pool('https://auth.speechcenter.verbio.com:444');
|
||||||
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
|
|
||||||
|
async function getVerbioAccessToken(client, logger, credentials) {
|
||||||
|
logger = logger || noopLogger;
|
||||||
|
const { client_id, client_secret } = credentials;
|
||||||
|
try {
|
||||||
|
const key = makeVerbioKey(client_id);
|
||||||
|
const access_token = await client.get(key);
|
||||||
|
if (access_token) {
|
||||||
|
return {access_token, servedFromCache: true};
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = {
|
||||||
|
client_id,
|
||||||
|
client_secret
|
||||||
|
};
|
||||||
|
|
||||||
|
const {statusCode, headers, body} = await pool.request({
|
||||||
|
path: '/api/v1/token',
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'User-Agent': 'jambonz'
|
||||||
|
},
|
||||||
|
body: JSON.stringify(payload),
|
||||||
|
timeout: HTTP_TIMEOUT,
|
||||||
|
followRedirects: false
|
||||||
|
});
|
||||||
|
|
||||||
|
if (200 !== statusCode) {
|
||||||
|
logger.debug({statusCode, headers, body: await body.text()}, 'error fetching access token from Verbio');
|
||||||
|
const err = new Error();
|
||||||
|
err.statusCode = statusCode;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
const json = await body.json();
|
||||||
|
const expiry = Math.floor(json.expiration_time - Date.now() / 1000 - 30);
|
||||||
|
await client.set(key, json.access_token, 'EX', expiry);
|
||||||
|
return {...json, servedFromCache: false};
|
||||||
|
} catch (err) {
|
||||||
|
debug(err, `getVerbioAccessToken: Error retrieving Verbio access token for client_id ${client_id}`);
|
||||||
|
logger.error(err, `getVerbioAccessToken: Error retrieving Verbio access token for client_id ${client_id}`);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = getVerbioAccessToken;
|
||||||
@@ -3,10 +3,10 @@ const {SynthesizerClient} = require('../stubs/nuance/synthesizer_grpc_pb');
|
|||||||
const {RivaSpeechSynthesisClient} = require('../stubs/riva/proto/riva_tts_grpc_pb');
|
const {RivaSpeechSynthesisClient} = require('../stubs/riva/proto/riva_tts_grpc_pb');
|
||||||
const {Pool} = require('undici');
|
const {Pool} = require('undici');
|
||||||
const pool = new Pool('https://auth.crt.nuance.com');
|
const pool = new Pool('https://auth.crt.nuance.com');
|
||||||
const HTTP_TIMEOUT = 5000;
|
|
||||||
const NUANCE_AUTH_ENDPOINT = 'tts.api.nuance.com:443';
|
const NUANCE_AUTH_ENDPOINT = 'tts.api.nuance.com:443';
|
||||||
const grpc = require('@grpc/grpc-js');
|
const grpc = require('@grpc/grpc-js');
|
||||||
const formurlencoded = require('form-urlencoded');
|
const formurlencoded = require('form-urlencoded');
|
||||||
|
const { HTTP_TIMEOUT } = require('./constants');
|
||||||
|
|
||||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
/**
|
/**
|
||||||
@@ -49,6 +49,12 @@ function makeAwsKey(awsAccessKeyId) {
|
|||||||
return `aws:${hash.digest('hex')}`;
|
return `aws:${hash.digest('hex')}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function makeVerbioKey(client_id) {
|
||||||
|
const hash = crypto.createHash('sha1');
|
||||||
|
hash.update(client_id);
|
||||||
|
return `verbio:${hash.digest('hex')}`;
|
||||||
|
}
|
||||||
|
|
||||||
function makeNuanceKey(clientId, secret, scope) {
|
function makeNuanceKey(clientId, secret, scope) {
|
||||||
const hash = crypto.createHash('sha1');
|
const hash = crypto.createHash('sha1');
|
||||||
hash.update(`${clientId}:${secret}:${scope}`);
|
hash.update(`${clientId}:${secret}:${scope}`);
|
||||||
@@ -117,6 +123,7 @@ module.exports = {
|
|||||||
makeNuanceKey,
|
makeNuanceKey,
|
||||||
makeIbmKey,
|
makeIbmKey,
|
||||||
makeAwsKey,
|
makeAwsKey,
|
||||||
|
makeVerbioKey,
|
||||||
getNuanceAccessToken,
|
getNuanceAccessToken,
|
||||||
createNuanceClient,
|
createNuanceClient,
|
||||||
createKryptonClient,
|
createKryptonClient,
|
||||||
|
|||||||
@@ -12,6 +12,34 @@ const stats = {
|
|||||||
histogram: () => {}
|
histogram: () => {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
test('Verbio - get Access key and voices', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {client, getTtsVoices, getVerbioAccessToken} = fn(opts, logger);
|
||||||
|
if (!process.env.VERBIO_CLIENT_ID || !process.env.VERBIO_CLIENT_SECRET) {
|
||||||
|
t.pass('skipping Verbio test since no Verbio Keys provided');
|
||||||
|
t.end();
|
||||||
|
client.quit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const credentials = {
|
||||||
|
client_id: process.env.VERBIO_CLIENT_ID,
|
||||||
|
client_secret: process.env.VERBIO_CLIENT_SECRET
|
||||||
|
};
|
||||||
|
let obj = await getVerbioAccessToken(credentials);
|
||||||
|
t.ok(obj.access_token && !obj.servedFromCache, 'successfully received access token not from cache');
|
||||||
|
obj = await getVerbioAccessToken(credentials);
|
||||||
|
t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
|
||||||
|
const voices = await getTtsVoices({vendor: 'verbio', credentials});
|
||||||
|
t.ok(voices && voices.length != 0, 'successfully received verbio voices');
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
test('IBM - create access key', async(t) => {
|
test('IBM - create access key', async(t) => {
|
||||||
const fn = require('..');
|
const fn = require('..');
|
||||||
const {client, getIbmAccessToken} = fn(opts, logger);
|
const {client, getIbmAccessToken} = fn(opts, logger);
|
||||||
|
|||||||
Reference in New Issue
Block a user