add eleven labs (#240)

* add eleven labs

* wip

* add voices and languages endpoints
This commit is contained in:
Hoan Luu Huu
2023-10-13 08:27:23 +07:00
committed by GitHub
parent 936a9244ba
commit 4c9af253a3
4 changed files with 155 additions and 6 deletions

View File

@@ -20,7 +20,7 @@ class SpeechCredential extends Model {
return rows;
}
static async isAvailableVendorAndLabel(service_provider_sid, account_sid, vendor, label) {
static async getSpeechCredentialsByVendorAndLabel(service_provider_sid, account_sid, vendor, label) {
let sql;
if (account_sid) {
sql = 'SELECT * FROM speech_credentials WHERE account_sid = ? AND vendor = ? AND label = ?';

View File

@@ -19,8 +19,10 @@ const {
testDeepgramStt,
testSonioxStt,
testIbmTts,
testIbmStt
testIbmStt,
testElevenlabs
} = require('../../utils/speech-utils');
const bent = require('bent');
const {promisePool} = require('../../db');
const validateAdd = async(req) => {
@@ -127,7 +129,8 @@ const encryptCredential = (obj) => {
custom_stt_url,
custom_tts_url,
auth_token = '',
cobalt_server_uri
cobalt_server_uri,
model_id
} = obj;
switch (vendor) {
@@ -202,6 +205,12 @@ const encryptCredential = (obj) => {
const cobaltData = JSON.stringify({cobalt_server_uri});
return encrypt(cobaltData);
case 'elevenlabs':
assert(api_key, 'invalid elevenLabs speech credential: api_key is required');
assert(model_id, 'invalid elevenLabs speech credential: model_id is required');
const elevenlabsData = JSON.stringify({api_key, model_id});
return encrypt(elevenlabsData);
default:
if (vendor.startsWith('custom:')) {
const customData = JSON.stringify({auth_token, custom_stt_url, custom_tts_url});
@@ -236,7 +245,7 @@ router.post('/', async(req, res) => {
// Check if vendor and label is already used for account or SP
if (label) {
const existingSpeech = await SpeechCredential.isAvailableVendorAndLabel(
const existingSpeech = await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
service_provider_sid, account_sid, vendor, label);
if (existingSpeech.length > 0) {
throw new DbErrorUnprocessableRequest(`Label ${label} is already in use for another speech credential`);
@@ -317,6 +326,10 @@ function decryptCredential(obj, credential, logger) {
} else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
} else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
obj.model_id = o.model_id;
} else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = obscureKey(o.auth_token);
@@ -454,7 +467,7 @@ router.put('/:sid', async(req, res) => {
custom_stt_endpoint_url,
custom_stt_url,
custom_tts_url,
cobalt_server_uri
cobalt_server_uri,
} = req.body;
const newCred = {
@@ -732,6 +745,18 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.sttTestResult(sid, false);
}
}
} else if (cred.vendor === 'elevenlabs') {
const {api_key, model_id} = credential;
if (cred.use_for_tts) {
try {
await testElevenlabs(logger, {api_key, model_id});
results.tts.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.tts = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
}
res.status(200).json(results);
@@ -741,4 +766,81 @@ router.get('/:sid/test', async(req, res) => {
}
});
/**
* Fetch speech voices and languages
*/
router.post('/voices', async(req, res) => {
const logger = req.app.locals.logger;
const {vendor, label} = req.body;
const account_sid = req.user.account_sid || req.body.account_sid;
const service_provider_sid = req.user.service_provider_sid ||
req.body.service_provider_sid || parseServiceProviderSid(req);
try {
res.status(200).json(await getTtsVoices(vendor, label, service_provider_sid, account_sid));
} catch (err) {
sysError(logger, res, err);
}
});
router.post('/languages', async(req, res) => {
const logger = req.app.locals.logger;
const {vendor, label} = req.body;
const account_sid = req.user.account_sid || req.body.account_sid;
const service_provider_sid = req.user.service_provider_sid ||
req.body.service_provider_sid || parseServiceProviderSid(req);
try {
res.status(200).json(await getTtsLanguages(vendor, label, service_provider_sid, account_sid));
} catch (err) {
sysError(logger, res, err);
}
});
const getTtsVoices = async(vendor, label, service_provider_sid, account_sid) => {
const credentials = await SpeechCredential.isAvailableVendorAndLabel(
service_provider_sid, account_sid, vendor, label);
const cred = credentials && credentials.length > 0 ? credentials[0] : null;
if (vendor === 'elevenlabs') {
const get = bent('https://api.elevenlabs.io', 'GET', 'json', {
...(cred && {
'xi-api-key' : cred.api_key
})
});
const resp = await get('/v1/voices');
return resp ? resp.voices.map((v) => {
return {
value: v.voice_id,
name: `${v.name} - ${v.labels.accent}, ${v.labels.description},
${v.labels.age}, ${v.labels.gender}, ${v.labels['use case']}`
};
}) : [];
}
return [];
};
const getTtsLanguages = async(vendor, label, service_provider_sid, account_sid) => {
const credentials = await SpeechCredential.isAvailableVendorAndLabel(
service_provider_sid, account_sid, vendor, label);
const cred = credentials && credentials.length > 0 ? credentials[0] : null;
if (vendor === 'elevenlabs') {
if (!cred) {
return [];
}
const get = bent('https://api.elevenlabs.io', 'GET', 'json', {
'xi-api-key' : cred.api_key
});
const resp = await get('/v1/models');
if (!resp || resp.length === 0) {
return [];
}
const model = resp.find((m) => m.model_id === cred.model_id);
return model ? model.languages.map((l) => {
return {
value: l.language_id,
name: l.name
};
}) : [];
}
};
module.exports = router;

View File

@@ -203,6 +203,29 @@ const testWellSaidTts = async(logger, credentials) => {
}
};
const testElevenlabs = async(logger, credentials) => {
const {api_key, model_id} = credentials;
try {
const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', {
'xi-api-key': api_key,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post('/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM', {
text: 'Hello',
model_id,
voice_settings: {
stability: 0.5,
similarity_boost: 0.5
}
});
return mp3;
} catch (err) {
logger.info({err}, 'synthEvenlabs returned error');
throw err;
}
};
const testIbmTts = async(logger, getTtsVoices, credentials) => {
const {tts_api_key, tts_region} = credentials;
const voices = await getTtsVoices({vendor: 'ibm', credentials: {tts_api_key, tts_region}});
@@ -251,5 +274,6 @@ module.exports = {
testDeepgramStt,
testIbmTts,
testIbmStt,
testSonioxStt
testSonioxStt,
testElevenlabs
};