add elevenlabs

This commit is contained in:
Quan HL
2023-10-12 14:22:15 +07:00
parent b5daeff047
commit ea153e9833
2 changed files with 57 additions and 1 deletions

View File

@@ -82,7 +82,8 @@ async function synthAudio(client, logger, stats, { account_sid,
let rtt;
logger = logger || noopLogger;
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor) ||
assert.ok(['google', 'aws', 'polly', 'microsoft',
'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs'].includes(vendor) ||
vendor.startsWith('custom'),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
if ('google' === vendor) {
@@ -183,6 +184,9 @@ async function synthAudio(client, logger, stats, { account_sid,
case 'wellsaid':
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
break;
case 'elevenlabs':
audioBuffer = await synthElevenlabs(logger, {credentials, stats, language, voice, text, filePath});
break;
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
({ audioBuffer, filePath } = await synthCustomVendor(logger,
{credentials, stats, language, voice, text, filePath}));
@@ -568,6 +572,29 @@ const synthCustomVendor = async(logger, {credentials, stats, language, voice, te
}
};
const synthElevenlabs = async(logger, {credentials, stats, language, voice, text}) => {
const {api_key, model_id} = credentials;
try {
const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', {
'xi-api-key': api_key,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post(`/v1/text-to-speech/${voice}`, {
text,
model_id,
voice_settings: {
stability: 0.5,
similarity_boost: 0.5
}
});
return mp3;
} catch (err) {
logger.info({err}, 'synthEvenlabs returned error');
throw err;
}
};
const getFileExtFromMime = (mime) => {
switch (mime) {
case 'audio/wav':

View File

@@ -411,6 +411,35 @@ test('Custom Vendor speech synth tests', async(t) => {
client.quit();
});
test('Elevenlabs speech synth tests', async(t) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
if (!process.env.ELEVENLABS_API_KEY || !process.env.ELEVENLABS_VOICE_ID || !process.env.ELEVENLABS_MODEL_ID) {
t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
return t.end();
}
const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
try {
let opts = await synthAudio(stats, {
vendor: 'elevenlabs',
credentials: {
api_key: process.env.ELEVENLABS_API_KEY,
model_id: process.env.ELEVENLABS_MODEL_ID
},
language: 'en-US',
voice: process.env.ELEVENLABS_VOICE_ID,
text,
});
t.ok(!opts.servedFromCache, `successfully synthesized eleven audio to ${opts.filePath}`);
} catch (err) {
console.error(JSON.stringify(err));
t.end(err);
}
client.quit();
})
test('TTS Cache tests', async(t) => {
const fn = require('..');
const {purgeTtsCache, getTtsSize, client} = fn(opts, logger);