mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
support playht3.0
This commit is contained in:
@@ -20,7 +20,8 @@ const {
|
||||
createKryptonClient,
|
||||
createRivaClient,
|
||||
noopLogger,
|
||||
makeFilePath
|
||||
makeFilePath,
|
||||
makePlayhtKey
|
||||
} = require('./utils');
|
||||
const getNuanceAccessToken = require('./get-nuance-access-token');
|
||||
const getVerbioAccessToken = require('./get-verbio-token');
|
||||
@@ -244,7 +245,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
});
|
||||
break;
|
||||
case 'playht':
|
||||
audioBuffer = await synthPlayHT(logger, {
|
||||
audioBuffer = await synthPlayHT(client, logger, {
|
||||
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
|
||||
});
|
||||
break;
|
||||
@@ -755,12 +756,38 @@ const synthElevenlabs = async(logger, {
|
||||
}
|
||||
};
|
||||
|
||||
const synthPlayHT = async(logger, {
|
||||
const synthPlayHT = async(client, logger, {
|
||||
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
|
||||
}) => {
|
||||
const {api_key, user_id, voice_engine, options: credOpts} = credentials;
|
||||
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||
|
||||
let synthesizeUrl = 'https://api.play.ht/api/v2/tts/stream';
|
||||
|
||||
// If model is play3.0, the stream url is provided by v3 auth endpoint which is
|
||||
// including jwt token as request params.
|
||||
if (voice_engine === 'Play3.0') {
|
||||
try {
|
||||
const post = bent('https://api.play.ht', 'POST', 'json', 201, {
|
||||
'AUTHORIZATION': api_key,
|
||||
'X-USER-ID': user_id,
|
||||
'Accept': 'application/json'
|
||||
});
|
||||
const key = makePlayhtKey(api_key);
|
||||
const url = await client.get(key);
|
||||
if (!url) {
|
||||
const {inference_address, expires_at_ms} = await post('/api/v3/auth');
|
||||
synthesizeUrl = inference_address;
|
||||
const expiry = Math.floor((expires_at_ms - Date.now()) / 1000 - 30);
|
||||
await client.set(key, inference_address, 'EX', expiry);
|
||||
}
|
||||
} catch (err) {
|
||||
logger.info({err}, 'synth PlayHT returned error for authentication version 3.0');
|
||||
stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||
let params = '';
|
||||
@@ -769,6 +796,7 @@ const synthPlayHT = async(logger, {
|
||||
params += ',vendor=playht';
|
||||
params += `,voice=${voice}`;
|
||||
params += `,voice_engine=${voice_engine}`;
|
||||
params += `,synthesize_url=${synthesizeUrl}`;
|
||||
params += ',write_cache_file=1';
|
||||
if (opts.quality) params += `,quality=${opts.quality}`;
|
||||
if (opts.speed) params += `,speed=${opts.speed}`;
|
||||
@@ -794,6 +822,7 @@ const synthPlayHT = async(logger, {
|
||||
'Accept': 'audio/mpeg',
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
|
||||
const mp3 = await post('/api/v2/tts/stream', {
|
||||
text,
|
||||
voice,
|
||||
|
||||
@@ -98,6 +98,11 @@ function makeAwsKey(awsAccessKeyId) {
|
||||
return `aws:${hash.digest('hex')}`;
|
||||
}
|
||||
|
||||
function makePlayhtKey(apiKey) {
|
||||
const hash = crypto.createHash('sha1');
|
||||
hash.update(apiKey);
|
||||
return `playht:${hash.digest('hex')}`;
|
||||
}
|
||||
function makeVerbioKey(client_id) {
|
||||
const hash = crypto.createHash('sha1');
|
||||
hash.update(client_id);
|
||||
@@ -171,6 +176,7 @@ module.exports = {
|
||||
makeSynthKey,
|
||||
makeNuanceKey,
|
||||
makeIbmKey,
|
||||
makePlayhtKey,
|
||||
makeAwsKey,
|
||||
makeVerbioKey,
|
||||
getNuanceAccessToken,
|
||||
|
||||
Reference in New Issue
Block a user