Merge pull request #92 from jambonz/feat/playht30

support playht3.0
This commit is contained in:
Dave Horton
2024-10-09 13:26:53 -04:00
committed by GitHub
3 changed files with 63 additions and 16 deletions

View File

@@ -20,7 +20,8 @@ const {
createKryptonClient,
createRivaClient,
noopLogger,
makeFilePath
makeFilePath,
makePlayhtKey
} = require('./utils');
const getNuanceAccessToken = require('./get-nuance-access-token');
const getVerbioAccessToken = require('./get-verbio-token');
@@ -244,7 +245,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
});
break;
case 'playht':
audioBuffer = await synthPlayHT(logger, {
audioBuffer = await synthPlayHT(client, logger, {
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
});
break;
@@ -755,12 +756,37 @@ const synthElevenlabs = async(logger, {
}
};
const synthPlayHT = async(logger, {
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
const synthPlayHT = async(client, logger, {
credentials, options, stats, voice, language, text, renderForCaching, disableTtsStreaming
}) => {
const {api_key, user_id, voice_engine, options: credOpts} = credentials;
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
let synthesizeUrl = 'https://api.play.ht/api/v2/tts/stream';
// If model is play3.0, the synthesizeUrl is got from authentication endpoint
if (voice_engine === 'Play3.0') {
try {
const post = bent('https://api.play.ht', 'POST', 'json', 201, {
'AUTHORIZATION': api_key,
'X-USER-ID': user_id,
'Accept': 'application/json'
});
const key = makePlayhtKey(api_key);
const url = await client.get(key);
if (!url) {
const {inference_address, expires_at_ms} = await post('/api/v3/auth');
synthesizeUrl = inference_address;
const expiry = Math.floor((expires_at_ms - Date.now()) / 1000 - 30);
await client.set(key, inference_address, 'EX', expiry);
}
} catch (err) {
logger.info({err}, 'synth PlayHT returned error for authentication version 3.0');
stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
throw err;
}
}
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
let params = '';
@@ -769,6 +795,7 @@ const synthPlayHT = async(logger, {
params += ',vendor=playht';
params += `,voice=${voice}`;
params += `,voice_engine=${voice_engine}`;
params += `,synthesize_url=${synthesizeUrl}`;
params += ',write_cache_file=1';
if (opts.quality) params += `,quality=${opts.quality}`;
if (opts.speed) params += `,speed=${opts.speed}`;
@@ -778,6 +805,8 @@ const synthPlayHT = async(logger, {
if (opts.voice_guidance) params += `,voice_guidance=${opts.voice_guidance}`;
if (opts.style_guidance) params += `,style_guidance=${opts.style_guidance}`;
if (opts.text_guidance) params += `,text_guidance=${opts.text_guidance}`;
if (opts.top_p) params += `,top_p=${opts.top_p}`;
if (opts.repetition_penalty) params += `,repetition_penalty=${opts.repetition_penalty}`;
params += '}';
return {
@@ -788,14 +817,18 @@ const synthPlayHT = async(logger, {
}
try {
const post = bent('https://api.play.ht', 'POST', 'buffer', {
const post = bent('POST', 'buffer', {
...(voice_engine !== 'Play3.0' && {
'AUTHORIZATION': api_key,
'X-USER-ID': user_id,
}),
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post('/api/v2/tts/stream', {
const mp3 = await post(synthesizeUrl, {
text,
...(voice_engine === 'Play3.0' && { language }),
voice,
voice_engine,
output_format: 'mp3',

View File

@@ -98,6 +98,11 @@ function makeAwsKey(awsAccessKeyId) {
return `aws:${hash.digest('hex')}`;
}
function makePlayhtKey(apiKey) {
const hash = crypto.createHash('sha1');
hash.update(apiKey);
return `playht:${hash.digest('hex')}`;
}
function makeVerbioKey(client_id) {
const hash = crypto.createHash('sha1');
hash.update(client_id);
@@ -171,6 +176,7 @@ module.exports = {
makeSynthKey,
makeNuanceKey,
makeIbmKey,
makePlayhtKey,
makeAwsKey,
makeVerbioKey,
getNuanceAccessToken,

View File

@@ -574,9 +574,9 @@ test('Elevenlabs speech synth tests', async(t) => {
t.end(err);
}
client.quit();
})
});
test('PlayHT speech synth tests', async(t) => {
const testPlayHT = async(t, voice_engine) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
@@ -584,26 +584,26 @@ test('PlayHT speech synth tests', async(t) => {
t.pass('skipping PlayHT speech synth tests since PLAYHT_API_KEY or PLAYHT_USER_ID is/are not provided');
return t.end();
}
const text = 'Hi there and welcome to jambones!';
const text = 'Hi there and welcome to jambones! ' + Date.now();
try {
let opts = await synthAudio(stats, {
const opts = await synthAudio(stats, {
vendor: 'playht',
credentials: {
api_key: process.env.PLAYHT_API_KEY,
user_id: process.env.PLAYHT_USER_ID,
voice_engine: 'PlayHT2.0-turbo',
voice_engine,
options: JSON.stringify({
quality: "medium",
quality: 'medium',
speed: 1,
seed: 1,
temperature: 1,
emotion: "female_happy",
emotion: 'female_happy',
voice_guidance: 3,
style_guidance: 20,
text_guidance: 1,
})
},
language: 'en-US',
language: 'english',
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
text,
renderForCaching: true
@@ -615,6 +615,14 @@ test('PlayHT speech synth tests', async(t) => {
t.end(err);
}
client.quit();
};
test('PlayHT speech synth tests', async(t) => {
await testPlayHT(t, 'PlayHT2.0-turbo');
});
test('PlayHT3.0 speech synth tests', async(t) => {
await testPlayHT(t, 'Play3.0');
});
test('rimelabs speech synth tests', async(t) => {