mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-18 19:27:46 +00:00
add verbio tts/stt
This commit is contained in:
1
index.js
1
index.js
@@ -14,6 +14,7 @@ module.exports = (opts, logger) => {
|
||||
purgeTtsCache: require('./lib/purge-tts-cache').bind(null, client, logger),
|
||||
addFileToCache: require('./lib/add-file-to-cache').bind(null, client, logger),
|
||||
synthAudio: require('./lib/synth-audio').bind(null, client, createHash, retrieveHash, logger),
|
||||
getVerbioAccessToken: require('./lib/get-verbio-token').bind(null, client, logger),
|
||||
getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
|
||||
getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
|
||||
getAwsAuthToken: require('./lib/get-aws-sts-token').bind(null, logger, createHash, retrieveHash),
|
||||
|
||||
3
lib/constants.js
Normal file
3
lib/constants.js
Normal file
@@ -0,0 +1,3 @@
|
||||
module.exports = {
|
||||
HTTP_TIMEOUT: 5000
|
||||
};
|
||||
@@ -2,8 +2,8 @@ const formurlencoded = require('form-urlencoded');
|
||||
const {Pool} = require('undici');
|
||||
const pool = new Pool('https://iam.cloud.ibm.com');
|
||||
const {makeIbmKey, noopLogger} = require('./utils');
|
||||
const { HTTP_TIMEOUT } = require('./constants');
|
||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||
const HTTP_TIMEOUT = 5000;
|
||||
|
||||
async function getIbmAccessToken(client, logger, apiKey) {
|
||||
logger = logger || noopLogger;
|
||||
|
||||
@@ -2,8 +2,8 @@ const formurlencoded = require('form-urlencoded');
|
||||
const {Pool} = require('undici');
|
||||
const pool = new Pool('https://auth.crt.nuance.com');
|
||||
const {makeNuanceKey, makeBasicAuthHeader, noopLogger} = require('./utils');
|
||||
const { HTTP_TIMEOUT } = require('./constants');
|
||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||
const HTTP_TIMEOUT = 5000;
|
||||
|
||||
async function getNuanceAccessToken(client, logger, clientId, secret, scope) {
|
||||
logger = logger || noopLogger;
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
const assert = require('assert');
|
||||
const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils');
|
||||
const getNuanceAccessToken = require('./get-nuance-access-token');
|
||||
const getVerbioAccessToken = require('./get-verbio-token');
|
||||
const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
|
||||
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
|
||||
const { IamAuthenticator } = require('ibm-watson/auth');
|
||||
const ttsGoogle = require('@google-cloud/text-to-speech');
|
||||
const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
|
||||
const getAwsAuthToken = require('./get-aws-sts-token');
|
||||
const {Pool} = require('undici');
|
||||
const { HTTP_TIMEOUT } = require('./constants');
|
||||
const verbioVoicePool = new Pool('https://us.rest.speechcenter.verbio.com');
|
||||
|
||||
const getIbmVoices = async(client, logger, credentials) => {
|
||||
const {tts_region, tts_api_key} = credentials;
|
||||
@@ -117,6 +121,26 @@ const getAwsVoices = async(_client, createHash, retrieveHash, logger, credential
|
||||
}
|
||||
};
|
||||
|
||||
const getVerbioVoices = async(client, logger, credentials) => {
|
||||
try {
|
||||
const access_token = await getVerbioAccessToken(client, logger, credentials);
|
||||
const { body} = await verbioVoicePool.request({
|
||||
path: '/api/v1/voices',
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${access_token.access_token}`,
|
||||
'User-Agent': 'jambonz'
|
||||
},
|
||||
timeout: HTTP_TIMEOUT,
|
||||
followRedirects: false
|
||||
});
|
||||
return await body.json();
|
||||
} catch (err) {
|
||||
logger.info({err}, 'getVerbioVoices - failed to list voices for Verbio');
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Synthesize speech to an mp3 file, and also cache the generated speech
|
||||
* in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
|
||||
@@ -136,7 +160,7 @@ const getAwsVoices = async(_client, createHash, retrieveHash, logger, credential
|
||||
async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
|
||||
logger = logger || noopLogger;
|
||||
|
||||
assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly'].includes(vendor),
|
||||
assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
|
||||
`getTtsVoices not supported for vendor ${vendor}`);
|
||||
|
||||
switch (vendor) {
|
||||
@@ -149,6 +173,8 @@ async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, c
|
||||
case 'aws':
|
||||
case 'polly':
|
||||
return getAwsVoices(client, createHash, retrieveHash, logger, credentials);
|
||||
case 'verbio':
|
||||
return getVerbioVoices(client, logger, credentials);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
51
lib/get-verbio-token.js
Normal file
51
lib/get-verbio-token.js
Normal file
@@ -0,0 +1,51 @@
|
||||
const {Pool} = require('undici');
|
||||
const { noopLogger, makeVerbioKey } = require('./utils');
|
||||
const { HTTP_TIMEOUT } = require('./constants');
|
||||
const pool = new Pool('https://auth.speechcenter.verbio.com:444');
|
||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||
|
||||
async function getVerbioAccessToken(client, logger, credentials) {
|
||||
logger = logger || noopLogger;
|
||||
const { client_id, client_secret } = credentials;
|
||||
try {
|
||||
const key = makeVerbioKey(client_id);
|
||||
const access_token = await client.get(key);
|
||||
if (access_token) {
|
||||
return {access_token, servedFromCache: true};
|
||||
}
|
||||
|
||||
const payload = {
|
||||
client_id,
|
||||
client_secret
|
||||
};
|
||||
|
||||
const {statusCode, headers, body} = await pool.request({
|
||||
path: '/api/v1/token',
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'jambonz'
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
timeout: HTTP_TIMEOUT,
|
||||
followRedirects: false
|
||||
});
|
||||
|
||||
if (200 !== statusCode) {
|
||||
logger.debug({statusCode, headers, body: await body.text()}, 'error fetching access token from Verbio');
|
||||
const err = new Error();
|
||||
err.statusCode = statusCode;
|
||||
throw err;
|
||||
}
|
||||
const json = await body.json();
|
||||
const expiry = Math.floor(json.expiration_time - Date.now() / 1000 - 30);
|
||||
await client.set(key, json.access_token, 'EX', expiry);
|
||||
return {...json, servedFromCache: false};
|
||||
} catch (err) {
|
||||
debug(err, `getVerbioAccessToken: Error retrieving Verbio access token for client_id ${client_id}`);
|
||||
logger.error(err, `getVerbioAccessToken: Error retrieving Verbio access token for client_id ${client_id}`);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = getVerbioAccessToken;
|
||||
@@ -3,10 +3,10 @@ const {SynthesizerClient} = require('../stubs/nuance/synthesizer_grpc_pb');
|
||||
const {RivaSpeechSynthesisClient} = require('../stubs/riva/proto/riva_tts_grpc_pb');
|
||||
const {Pool} = require('undici');
|
||||
const pool = new Pool('https://auth.crt.nuance.com');
|
||||
const HTTP_TIMEOUT = 5000;
|
||||
const NUANCE_AUTH_ENDPOINT = 'tts.api.nuance.com:443';
|
||||
const grpc = require('@grpc/grpc-js');
|
||||
const formurlencoded = require('form-urlencoded');
|
||||
const { HTTP_TIMEOUT } = require('./constants');
|
||||
|
||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||
/**
|
||||
@@ -49,6 +49,12 @@ function makeAwsKey(awsAccessKeyId) {
|
||||
return `aws:${hash.digest('hex')}`;
|
||||
}
|
||||
|
||||
function makeVerbioKey(client_id) {
|
||||
const hash = crypto.createHash('sha1');
|
||||
hash.update(client_id);
|
||||
return `verbio:${hash.digest('hex')}`;
|
||||
}
|
||||
|
||||
function makeNuanceKey(clientId, secret, scope) {
|
||||
const hash = crypto.createHash('sha1');
|
||||
hash.update(`${clientId}:${secret}:${scope}`);
|
||||
@@ -117,6 +123,7 @@ module.exports = {
|
||||
makeNuanceKey,
|
||||
makeIbmKey,
|
||||
makeAwsKey,
|
||||
makeVerbioKey,
|
||||
getNuanceAccessToken,
|
||||
createNuanceClient,
|
||||
createKryptonClient,
|
||||
|
||||
@@ -12,6 +12,34 @@ const stats = {
|
||||
histogram: () => {}
|
||||
};
|
||||
|
||||
test('Verbio - get Access key and voices', async(t) => {
|
||||
const fn = require('..');
|
||||
const {client, getTtsVoices, getVerbioAccessToken} = fn(opts, logger);
|
||||
if (!process.env.VERBIO_CLIENT_ID || !process.env.VERBIO_CLIENT_SECRET) {
|
||||
t.pass('skipping Verbio test since no Verbio Keys provided');
|
||||
t.end();
|
||||
client.quit();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const credentials = {
|
||||
client_id: process.env.VERBIO_CLIENT_ID,
|
||||
client_secret: process.env.VERBIO_CLIENT_SECRET
|
||||
};
|
||||
let obj = await getVerbioAccessToken(credentials);
|
||||
t.ok(obj.access_token && !obj.servedFromCache, 'successfully received access token not from cache');
|
||||
obj = await getVerbioAccessToken(credentials);
|
||||
t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
|
||||
const voices = await getTtsVoices({vendor: 'verbio', credentials});
|
||||
t.ok(voices && voices.length != 0, 'successfully received verbio voices');
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
t.end(err);
|
||||
}
|
||||
client.quit();
|
||||
});
|
||||
|
||||
test('IBM - create access key', async(t) => {
|
||||
const fn = require('..');
|
||||
const {client, getIbmAccessToken} = fn(opts, logger);
|
||||
|
||||
Reference in New Issue
Block a user