mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
wip
This commit is contained in:
4
index.js
4
index.js
@@ -13,10 +13,10 @@ module.exports = (opts, logger) => {
|
||||
getTtsSize: require('./lib/get-tts-size').bind(null, client, logger),
|
||||
purgeTtsCache: require('./lib/purge-tts-cache').bind(null, client, logger),
|
||||
addFileToCache: require('./lib/add-file-to-cache').bind(null, client, logger),
|
||||
synthAudio: require('./lib/synth-audio').bind(null, client, logger),
|
||||
synthAudio: require('./lib/synth-audio').bind(null, client, createHash, retrieveHash, logger),
|
||||
getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
|
||||
getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
|
||||
getAwsAuthToken: require('./lib/get-aws-sts-token').bind(null, logger, createHash, retrieveHash),
|
||||
getTtsVoices: require('./lib/get-tts-voices').bind(null, client, logger),
|
||||
getTtsVoices: require('./lib/get-tts-voices').bind(null, client, createHash, retrieveHash, logger),
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const { STSClient, GetSessionTokenCommand } = require('@aws-sdk/client-sts');
|
||||
const { STSClient, GetSessionTokenCommand, AssumeRoleCommand } = require('@aws-sdk/client-sts');
|
||||
const {makeAwsKey, noopLogger} = require('./utils');
|
||||
const debug = require('debug')('jambonz:speech-utils');
|
||||
const EXPIRY = 3600;
|
||||
@@ -6,30 +6,41 @@ const EXPIRY = 3600;
|
||||
async function getAwsAuthToken(
|
||||
logger,
|
||||
createHash, retrieveHash,
|
||||
awsAccessKeyId, awsSecretAccessKey, awsRegion) {
|
||||
awsAccessKeyId, awsSecretAccessKey, awsRegion, roleArn = null) {
|
||||
logger = logger || noopLogger;
|
||||
try {
|
||||
const key = makeAwsKey(awsAccessKeyId);
|
||||
const key = makeAwsKey(roleArn || awsAccessKeyId);
|
||||
const obj = await retrieveHash(key);
|
||||
if (obj) return {...obj, servedFromCache: true};
|
||||
|
||||
/* access token not found in cache, so generate it using STS */
|
||||
const stsClient = new STSClient({
|
||||
region: awsRegion,
|
||||
credentials: {
|
||||
accessKeyId: awsAccessKeyId,
|
||||
secretAccessKey: awsSecretAccessKey,
|
||||
}
|
||||
});
|
||||
const command = new GetSessionTokenCommand({DurationSeconds: EXPIRY});
|
||||
const data = await stsClient.send(command);
|
||||
let data;
|
||||
if (roleArn) {
|
||||
const stsClient = new STSClient({ region: awsRegion});
|
||||
const roleToAssume = { RoleArn: roleArn, RoleSessionName: 'Jambonz_Speech', DurationSeconds: EXPIRY};
|
||||
const command = new AssumeRoleCommand(roleToAssume);
|
||||
|
||||
const response = await stsClient.send(command);
|
||||
data = response;
|
||||
} else {
|
||||
/* access token not found in cache, so generate it using STS */
|
||||
const stsClient = new STSClient({
|
||||
region: awsRegion,
|
||||
credentials: {
|
||||
accessKeyId: awsAccessKeyId,
|
||||
secretAccessKey: awsSecretAccessKey,
|
||||
}
|
||||
});
|
||||
const command = new GetSessionTokenCommand({DurationSeconds: EXPIRY});
|
||||
data = await stsClient.send(command);
|
||||
}
|
||||
|
||||
const credentials = {
|
||||
accessKeyId: data.Credentials.AccessKeyId,
|
||||
secretAccessKey: data.Credentials.SecretAccessKey,
|
||||
securityToken: data.Credentials.SessionToken
|
||||
sessionToken: data.Credentials.SessionToken
|
||||
};
|
||||
|
||||
console.log(credentials, 'xquanluu');
|
||||
/* expire 10 minutes before the hour, so we don't lose the use of it during a call */
|
||||
createHash(key, credentials, EXPIRY - 600)
|
||||
.catch((err) => logger.error(err, `Error saving hash for key ${key}`));
|
||||
|
||||
@@ -6,7 +6,7 @@ const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
|
||||
const { IamAuthenticator } = require('ibm-watson/auth');
|
||||
const ttsGoogle = require('@google-cloud/text-to-speech');
|
||||
const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
|
||||
const { STSClient, AssumeRoleCommand } = require('@aws-sdk/client-sts');
|
||||
const getAwsAuthToken = require('./get-aws-sts-token');
|
||||
|
||||
const getIbmVoices = async(client, logger, credentials) => {
|
||||
const {tts_region, tts_api_key} = credentials;
|
||||
@@ -88,7 +88,7 @@ const getGoogleVoices = async(_client, logger, credentials) => {
|
||||
return await client.listVoices();
|
||||
};
|
||||
|
||||
const getAwsVoices = async(_client, logger, credentials) => {
|
||||
const getAwsVoices = async(_client, createHash, retrieveHash, logger, credentials) => {
|
||||
try {
|
||||
const {region, accessKeyId, secretAccessKey, roleArn} = credentials;
|
||||
let client = null;
|
||||
@@ -101,15 +101,9 @@ const getAwsVoices = async(_client, logger, credentials) => {
|
||||
}
|
||||
});
|
||||
} else if (roleArn) {
|
||||
const stsClient = new STSClient({ region});
|
||||
const roleToAssume = { RoleArn: roleArn, RoleSessionName: 'Jambonz Speech' };
|
||||
const command = new AssumeRoleCommand(roleToAssume);
|
||||
|
||||
const response = await stsClient.send(command);
|
||||
const assumedRoleCreds = response.Credentials;
|
||||
client = new PollyClient({
|
||||
region,
|
||||
credentials: assumedRoleCreds,
|
||||
credentials: await getAwsAuthToken(logger, createHash, retrieveHash, null, null, region, roleArn),
|
||||
});
|
||||
} else {
|
||||
client = new PollyClient();
|
||||
@@ -139,7 +133,7 @@ const getAwsVoices = async(_client, logger, credentials) => {
|
||||
* @returns object containing filepath to an mp3 file in the /tmp folder containing
|
||||
* the synthesized audio, and a variable indicating whether it was served from cache
|
||||
*/
|
||||
async function getTtsVoices(client, logger, {vendor, credentials}) {
|
||||
async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
|
||||
logger = logger || noopLogger;
|
||||
|
||||
assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly'].includes(vendor),
|
||||
@@ -154,7 +148,7 @@ async function getTtsVoices(client, logger, {vendor, credentials}) {
|
||||
return getGoogleVoices(client, logger, credentials);
|
||||
case 'aws':
|
||||
case 'polly':
|
||||
return getAwsVoices(client, logger, credentials);
|
||||
return getAwsVoices(client, createHash, retrieveHash, logger, credentials);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ const fs = require('fs');
|
||||
const bent = require('bent');
|
||||
const ttsGoogle = require('@google-cloud/text-to-speech');
|
||||
const { PollyClient, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly');
|
||||
const { STSClient, AssumeRoleCommand } = require('@aws-sdk/client-sts');
|
||||
|
||||
const sdk = require('microsoft-cognitiveservices-speech-sdk');
|
||||
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
|
||||
@@ -40,6 +39,7 @@ const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||
const EXPIRES = (process.env.JAMBONES_TTS_CACHE_DURATION_MINS || 4 * 60) * 60; // cache tts for 4 hours
|
||||
const TMP_FOLDER = '/tmp';
|
||||
const OpenAI = require('openai');
|
||||
const getAwsAuthToken = require('./get-aws-sts-token');
|
||||
|
||||
|
||||
const trimTrailingSilence = (buffer) => {
|
||||
@@ -76,7 +76,7 @@ const trimTrailingSilence = (buffer) => {
|
||||
* @returns object containing filepath to an mp3 file in the /tmp folder containing
|
||||
* the synthesized audio, and a variable indicating whether it was served from cache
|
||||
*/
|
||||
async function synthAudio(client, logger, stats, { account_sid,
|
||||
async function synthAudio(client, createHash, retrieveHash, logger, stats, { account_sid,
|
||||
vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId,
|
||||
disableTtsCache, renderForCaching, disableTtsStreaming, options
|
||||
}) {
|
||||
@@ -188,7 +188,8 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
case 'aws':
|
||||
case 'polly':
|
||||
vendorLabel = 'aws';
|
||||
audioBuffer = await synthPolly(logger, {credentials, stats, language, voice, text, engine});
|
||||
audioBuffer = await synthPolly(createHash, retrieveHash, logger,
|
||||
{credentials, stats, language, voice, text, engine});
|
||||
break;
|
||||
case 'azure':
|
||||
case 'microsoft':
|
||||
@@ -264,7 +265,8 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
});
|
||||
}
|
||||
|
||||
const synthPolly = async(logger, {credentials, stats, language, voice, engine, text}) => {
|
||||
const synthPolly = async(createHash, retrieveHash, logger,
|
||||
{credentials, stats, language, voice, engine, text}) => {
|
||||
try {
|
||||
const {region, accessKeyId, secretAccessKey, roleArn} = credentials;
|
||||
let polly;
|
||||
@@ -277,15 +279,9 @@ const synthPolly = async(logger, {credentials, stats, language, voice, engine, t
|
||||
}
|
||||
});
|
||||
} else if (roleArn) {
|
||||
const stsClient = new STSClient({ region});
|
||||
const roleToAssume = { RoleArn: roleArn, RoleSessionName: 'Jambonz_Speech' };
|
||||
const command = new AssumeRoleCommand(roleToAssume);
|
||||
|
||||
const response = await stsClient.send(command);
|
||||
const assumedRoleCreds = response.Credentials;
|
||||
polly = new PollyClient({
|
||||
region,
|
||||
credentials: assumedRoleCreds,
|
||||
credentials: await getAwsAuthToken(logger, createHash, retrieveHash, null, null, region, roleArn),
|
||||
});
|
||||
} else {
|
||||
// AWS RoleArn assigned to Instance profile
|
||||
|
||||
Reference in New Issue
Block a user