mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
add elevenlabs
This commit is contained in:
@@ -82,7 +82,8 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
let rtt;
|
||||
logger = logger || noopLogger;
|
||||
|
||||
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor) ||
|
||||
assert.ok(['google', 'aws', 'polly', 'microsoft',
|
||||
'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs'].includes(vendor) ||
|
||||
vendor.startsWith('custom'),
|
||||
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
|
||||
if ('google' === vendor) {
|
||||
@@ -183,6 +184,9 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
case 'wellsaid':
|
||||
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
|
||||
break;
|
||||
case 'elevenlabs':
|
||||
audioBuffer = await synthElevenlabs(logger, {credentials, stats, language, voice, text, filePath});
|
||||
break;
|
||||
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
|
||||
({ audioBuffer, filePath } = await synthCustomVendor(logger,
|
||||
{credentials, stats, language, voice, text, filePath}));
|
||||
@@ -568,6 +572,29 @@ const synthCustomVendor = async(logger, {credentials, stats, language, voice, te
|
||||
}
|
||||
};
|
||||
|
||||
const synthElevenlabs = async(logger, {credentials, stats, language, voice, text}) => {
|
||||
const {api_key, model_id} = credentials;
|
||||
try {
|
||||
const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', {
|
||||
'xi-api-key': api_key,
|
||||
'Accept': 'audio/mpeg',
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
const mp3 = await post(`/v1/text-to-speech/${voice}`, {
|
||||
text,
|
||||
model_id,
|
||||
voice_settings: {
|
||||
stability: 0.5,
|
||||
similarity_boost: 0.5
|
||||
}
|
||||
});
|
||||
return mp3;
|
||||
} catch (err) {
|
||||
logger.info({err}, 'synthEvenlabs returned error');
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
const getFileExtFromMime = (mime) => {
|
||||
switch (mime) {
|
||||
case 'audio/wav':
|
||||
|
||||
@@ -411,6 +411,35 @@ test('Custom Vendor speech synth tests', async(t) => {
|
||||
client.quit();
|
||||
});
|
||||
|
||||
test('Elevenlabs speech synth tests', async(t) => {
|
||||
const fn = require('..');
|
||||
const {synthAudio, client} = fn(opts, logger);
|
||||
|
||||
if (!process.env.ELEVENLABS_API_KEY || !process.env.ELEVENLABS_VOICE_ID || !process.env.ELEVENLABS_MODEL_ID) {
|
||||
t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
|
||||
return t.end();
|
||||
}
|
||||
const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
|
||||
try {
|
||||
let opts = await synthAudio(stats, {
|
||||
vendor: 'elevenlabs',
|
||||
credentials: {
|
||||
api_key: process.env.ELEVENLABS_API_KEY,
|
||||
model_id: process.env.ELEVENLABS_MODEL_ID
|
||||
},
|
||||
language: 'en-US',
|
||||
voice: process.env.ELEVENLABS_VOICE_ID,
|
||||
text,
|
||||
});
|
||||
t.ok(!opts.servedFromCache, `successfully synthesized eleven audio to ${opts.filePath}`);
|
||||
|
||||
} catch (err) {
|
||||
console.error(JSON.stringify(err));
|
||||
t.end(err);
|
||||
}
|
||||
client.quit();
|
||||
})
|
||||
|
||||
test('TTS Cache tests', async(t) => {
|
||||
const fn = require('..');
|
||||
const {purgeTtsCache, getTtsSize, client} = fn(opts, logger);
|
||||
|
||||
Reference in New Issue
Block a user