mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
Merge pull request #65 from jambonz/feat/mod_playht_tts
support mod_playht_tts
This commit is contained in:
@@ -85,7 +85,7 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
logger = logger || noopLogger;
|
||||
|
||||
assert.ok(['google', 'aws', 'polly', 'microsoft',
|
||||
'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper', 'deepgram'].includes(vendor) ||
|
||||
'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper', 'deepgram', 'playht'].includes(vendor) ||
|
||||
vendor.startsWith('custom'),
|
||||
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
|
||||
if ('google' === vendor) {
|
||||
@@ -123,6 +123,11 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
assert.ok(voice, 'synthAudio requires voice when elevenlabs is used');
|
||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when elevenlabs is used');
|
||||
assert.ok(credentials.model_id, 'synthAudio requires model_id when elevenlabs is used');
|
||||
} else if ('playht' === vendor) {
|
||||
assert.ok(voice, 'synthAudio requires voice when playht is used');
|
||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when playht is used');
|
||||
assert.ok(credentials.user_id, 'synthAudio requires user_id when playht is used');
|
||||
assert.ok(credentials.voice_engine, 'synthAudio requires voice_engine when playht is used');
|
||||
} else if ('whisper' === vendor) {
|
||||
assert.ok(voice, 'synthAudio requires voice when whisper is used');
|
||||
assert.ok(credentials.model_id, 'synthAudio requires model when whisper is used');
|
||||
@@ -206,6 +211,12 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
});
|
||||
if (audioBuffer?.filePath) return audioBuffer;
|
||||
break;
|
||||
case 'playht':
|
||||
audioBuffer = await synthPlayHT(logger, {
|
||||
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
|
||||
});
|
||||
if (audioBuffer?.filePath) return audioBuffer;
|
||||
break;
|
||||
case 'whisper':
|
||||
audioBuffer = await synthWhisper(logger, {
|
||||
credentials, stats, voice, text, renderForCaching, disableTtsStreaming});
|
||||
@@ -682,6 +693,61 @@ const synthElevenlabs = async(logger, {
|
||||
}
|
||||
};
|
||||
|
||||
const synthPlayHT = async(logger, {
|
||||
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
|
||||
}) => {
|
||||
const {api_key, user_id, voice_engine, options: credOpts} = credentials;
|
||||
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||
|
||||
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||
let params = '';
|
||||
params += `{api_key=${api_key}`;
|
||||
params += `,user_id=${user_id}`;
|
||||
params += ',vendor=playht';
|
||||
params += `,voice=${voice}`;
|
||||
params += `,voice_engine=${voice_engine}`;
|
||||
params += ',write_cache_file=1';
|
||||
if (opts.quality) params += `,quality=${opts.quality}`;
|
||||
if (opts.speed) params += `,speed=${opts.speed}`;
|
||||
if (opts.seed) params += `,style=${opts.seed}`;
|
||||
if (opts.temperature) params += `,temperature=${opts.temperature}`;
|
||||
if (opts.emotion) params += `,emotion=${opts.emotion}`;
|
||||
if (opts.voice_guidance) params += `,voice_guidance=${opts.voice_guidance}`;
|
||||
if (opts.style_guidance) params += `,style_guidance=${opts.style_guidance}`;
|
||||
if (opts.text_guidance) params += `,text_guidance=${opts.text_guidance}`;
|
||||
params += '}';
|
||||
|
||||
return {
|
||||
filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`,
|
||||
servedFromCache: false,
|
||||
rtt: 0
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const post = bent('https://api.play.ht', 'POST', 'buffer', {
|
||||
'AUTHORIZATION': api_key,
|
||||
'X-USER-ID': user_id,
|
||||
'Accept': 'audio/mpeg',
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
const mp3 = await post('/api/v2/tts/stream', {
|
||||
text,
|
||||
voice,
|
||||
voice_engine,
|
||||
output_format: 'mp3',
|
||||
sample_rate: 8000,
|
||||
...opts
|
||||
});
|
||||
return mp3;
|
||||
} catch (err) {
|
||||
logger.info({err}, 'synth PlayHT returned error');
|
||||
stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
||||
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
||||
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
|
||||
|
||||
@@ -549,6 +549,47 @@ test('Elevenlabs speech synth tests', async(t) => {
|
||||
client.quit();
|
||||
})
|
||||
|
||||
test('PlayHT speech synth tests', async(t) => {
|
||||
const fn = require('..');
|
||||
const {synthAudio, client} = fn(opts, logger);
|
||||
|
||||
if (!process.env.PLAYHT_API_KEY || !process.env.PLAYHT_USER_ID) {
|
||||
t.pass('skipping PlayHT speech synth tests since PLAYHT_API_KEY or PLAYHT_USER_ID is/are not provided');
|
||||
return t.end();
|
||||
}
|
||||
const text = 'Hi there and welcome to jambones!';
|
||||
try {
|
||||
let opts = await synthAudio(stats, {
|
||||
vendor: 'playht',
|
||||
credentials: {
|
||||
api_key: process.env.PLAYHT_API_KEY,
|
||||
user_id: process.env.PLAYHT_USER_ID,
|
||||
voice_engine: 'PlayHT2.0-turbo',
|
||||
options: JSON.stringify({
|
||||
quality: "medium",
|
||||
speed: 1,
|
||||
seed: 1,
|
||||
temperature: 1,
|
||||
emotion: "female_happy",
|
||||
voice_guidance: 3,
|
||||
style_guidance: 20,
|
||||
text_guidance: 1,
|
||||
})
|
||||
},
|
||||
language: 'en-US',
|
||||
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
|
||||
text,
|
||||
renderForCaching: true
|
||||
});
|
||||
t.ok(!opts.servedFromCache, `successfully synthesized eleven audio to ${opts.filePath}`);
|
||||
|
||||
} catch (err) {
|
||||
console.error(JSON.stringify(err));
|
||||
t.end(err);
|
||||
}
|
||||
client.quit();
|
||||
})
|
||||
|
||||
test('whisper speech synth tests', async(t) => {
|
||||
const fn = require('..');
|
||||
const {synthAudio, client} = fn(opts, logger);
|
||||
|
||||
Reference in New Issue
Block a user