Files
speech-utils/lib/get-tts-voices.js
T
Dave Horton 7d076bb8b4 chore: deprecate + remove verbio, nuance, playht speech vendor support (#144)
* chore: deprecate and remove verbio, nuance speech vendor support

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* chore: also deprecate and remove PlayHT speech vendor

PlayHT was acquired and no longer provides the service.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-17 16:20:00 -04:00

81 lines
2.6 KiB
JavaScript

const assert = require('assert');
const {noopLogger} = require('./utils');
const ttsGoogle = require('@google-cloud/text-to-speech');
const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
const getAwsAuthToken = require('./get-aws-sts-token');
const getGoogleVoices = async(_client, logger, credentials) => {
const client = new ttsGoogle.TextToSpeechClient({credentials});
return await client.listVoices();
};
const getAwsVoices = async(_client, createHash, retrieveHash, logger, credentials) => {
try {
const {region, accessKeyId, secretAccessKey, roleArn} = credentials;
let client = null;
if (accessKeyId && secretAccessKey) {
client = new PollyClient({
region,
credentials: {
accessKeyId,
secretAccessKey
}
});
} else if (roleArn) {
client = new PollyClient({
region,
credentials: await getAwsAuthToken(
logger, createHash, retrieveHash,
{
region,
roleArn
}),
});
} else {
client = new PollyClient({region});
}
const command = new DescribeVoicesCommand({});
const response = await client.send(command);
return response;
} catch (err) {
logger.info({err}, 'testMicrosoftTts - failed to list voices for region ${region}');
throw err;
}
};
/**
* Synthesize speech to an mp3 file, and also cache the generated speech
* in redis (base64 format) for 24 hours so as to avoid unnecessarily paying
* time and again for speech synthesis of the same text.
* It is the responsibility of the caller to unlink the mp3 file after use.
*
* @param {*} client - redis client
* @param {*} logger - pino logger
* @param {object} opts - options
* @param {string} opts.vendor - 'google' or 'aws' ('polly' is an alias for 'aws')
* @param {string} opt.language - language code
* @param {string} opts.voice - voice identifier
* @param {string} opts.text - text or ssml to synthesize
* @returns object containing filepath to an mp3 file in the /tmp folder containing
* the synthesized audio, and a variable indicating whether it was served from cache
*/
async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
logger = logger || noopLogger;
assert.ok(['google', 'aws', 'polly'].includes(vendor),
`getTtsVoices not supported for vendor ${vendor}`);
switch (vendor) {
case 'google':
return getGoogleVoices(client, logger, credentials);
case 'aws':
case 'polly':
return getAwsVoices(client, createHash, retrieveHash, logger, credentials);
default:
break;
}
}
module.exports = getTtsVoices;