support playht3.0

This commit is contained in:
Quan HL
2024-09-27 12:08:41 +07:00
parent 1846203807
commit 1a04fd736c
3 changed files with 53 additions and 10 deletions

View File

@@ -20,7 +20,8 @@ const {
createKryptonClient,
createRivaClient,
noopLogger,
makeFilePath
makeFilePath,
makePlayhtKey
} = require('./utils');
const getNuanceAccessToken = require('./get-nuance-access-token');
const getVerbioAccessToken = require('./get-verbio-token');
@@ -244,7 +245,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
});
break;
case 'playht':
audioBuffer = await synthPlayHT(logger, {
audioBuffer = await synthPlayHT(client, logger, {
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
});
break;
@@ -755,12 +756,38 @@ const synthElevenlabs = async(logger, {
}
};
const synthPlayHT = async(logger, {
const synthPlayHT = async(client, logger, {
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
}) => {
const {api_key, user_id, voice_engine, options: credOpts} = credentials;
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
let synthesizeUrl = 'https://api.play.ht/api/v2/tts/stream';
// If model is play3.0, the stream url is provided by v3 auth endpoint which is
// including jwt token as request params.
if (voice_engine === 'Play3.0') {
try {
const post = bent('https://api.play.ht', 'POST', 'json', 201, {
'AUTHORIZATION': api_key,
'X-USER-ID': user_id,
'Accept': 'application/json'
});
const key = makePlayhtKey(api_key);
const url = await client.get(key);
if (!url) {
const {inference_address, expires_at_ms} = await post('/api/v3/auth');
synthesizeUrl = inference_address;
const expiry = Math.floor((expires_at_ms - Date.now()) / 1000 - 30);
await client.set(key, inference_address, 'EX', expiry);
}
} catch (err) {
logger.info({err}, 'synth PlayHT returned error for authentication version 3.0');
stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
throw err;
}
}
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
let params = '';
@@ -769,6 +796,7 @@ const synthPlayHT = async(logger, {
params += ',vendor=playht';
params += `,voice=${voice}`;
params += `,voice_engine=${voice_engine}`;
params += `,synthesize_url=${synthesizeUrl}`;
params += ',write_cache_file=1';
if (opts.quality) params += `,quality=${opts.quality}`;
if (opts.speed) params += `,speed=${opts.speed}`;
@@ -794,6 +822,7 @@ const synthPlayHT = async(logger, {
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post('/api/v2/tts/stream', {
text,
voice,

View File

@@ -98,6 +98,11 @@ function makeAwsKey(awsAccessKeyId) {
return `aws:${hash.digest('hex')}`;
}
function makePlayhtKey(apiKey) {
const hash = crypto.createHash('sha1');
hash.update(apiKey);
return `playht:${hash.digest('hex')}`;
}
function makeVerbioKey(client_id) {
const hash = crypto.createHash('sha1');
hash.update(client_id);
@@ -171,6 +176,7 @@ module.exports = {
makeSynthKey,
makeNuanceKey,
makeIbmKey,
makePlayhtKey,
makeAwsKey,
makeVerbioKey,
getNuanceAccessToken,