mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2025-12-19 05:47:46 +00:00
@@ -14,7 +14,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
|
||||
testSpeechmaticsStt,
|
||||
testCartesia,
|
||||
testVoxistStt,
|
||||
testOpenAiStt} = require('../../utils/speech-utils');
|
||||
testOpenAiStt,
|
||||
testInworld} = require('../../utils/speech-utils');
|
||||
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
|
||||
const {
|
||||
testGoogleTts,
|
||||
@@ -283,6 +284,12 @@ const encryptCredential = (obj) => {
|
||||
const rimelabsData = JSON.stringify({api_key, model_id, options});
|
||||
return encrypt(rimelabsData);
|
||||
|
||||
case 'inworld':
|
||||
assert(api_key, 'invalid inworld speech credential: api_key is required');
|
||||
assert(model_id, 'invalid inworld speech credential: model_id is required');
|
||||
const inworldData = JSON.stringify({api_key, model_id, options});
|
||||
return encrypt(inworldData);
|
||||
|
||||
case 'assemblyai':
|
||||
assert(api_key, 'invalid assemblyai speech credential: api_key is required');
|
||||
const assemblyaiData = JSON.stringify({api_key});
|
||||
@@ -874,6 +881,17 @@ router.get('/:sid/test', async(req, res) => {
|
||||
SpeechCredential.ttsTestResult(sid, false);
|
||||
}
|
||||
}
|
||||
} else if (cred.vendor === 'inworld') {
|
||||
if (cred.use_for_tts) {
|
||||
try {
|
||||
await testInworld(logger, synthAudio, credential);
|
||||
results.tts.status = 'ok';
|
||||
SpeechCredential.ttsTestResult(sid, true);
|
||||
} catch (err) {
|
||||
results.tts = {status: 'fail', reason: err.message};
|
||||
SpeechCredential.ttsTestResult(sid, false);
|
||||
}
|
||||
}
|
||||
} else if (cred.vendor === 'rimelabs') {
|
||||
if (cred.use_for_tts) {
|
||||
try {
|
||||
|
||||
118
lib/utils/speech-data/tts-inworld.js
Normal file
118
lib/utils/speech-data/tts-inworld.js
Normal file
@@ -0,0 +1,118 @@
|
||||
module.exports = [
|
||||
{
|
||||
value: 'en',
|
||||
name: 'English',
|
||||
voices: [
|
||||
{ name: 'Alex', value: 'Alex' },
|
||||
{ name: 'Ashley', value: 'Ashley' },
|
||||
{ name: 'Craig', value: 'Craig' },
|
||||
{ name: 'Deborah', value: 'Deborah' },
|
||||
{ name: 'Dennis', value: 'Dennis' },
|
||||
{ name: 'Edward', value: 'Edward' },
|
||||
{ name: 'Elizabeth', value: 'Elizabeth' },
|
||||
{ name: 'Hades', value: 'Hades' },
|
||||
{ name: 'Julia', value: 'Julia' },
|
||||
{ name: 'Pixie', value: 'Pixie' },
|
||||
{ name: 'Mark', value: 'Mark' },
|
||||
{ name: 'Olivia', value: 'Olivia' },
|
||||
{ name: 'Priya', value: 'Priya' },
|
||||
{ name: 'Ronald', value: 'Ronald' },
|
||||
{ name: 'Sarah', value: 'Sarah' },
|
||||
{ name: 'Shaun', value: 'Shaun' },
|
||||
{ name: 'Theodore', value: 'Theodore' },
|
||||
{ name: 'Timothy', value: 'Timothy' },
|
||||
{ name: 'Wendy', value: 'Wendy' },
|
||||
{ name: 'Dominus', value: 'Dominus' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'zh',
|
||||
name: 'Chinese',
|
||||
voices: [
|
||||
{ name: 'Yichen', value: 'Yichen' },
|
||||
{ name: 'Xiaoyin', value: 'Xiaoyin' },
|
||||
{ name: 'Xinyi', value: 'Xinyi' },
|
||||
{ name: 'Jing', value: 'Jing' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'nl',
|
||||
name: 'Dutch',
|
||||
voices: [
|
||||
{ name: 'Erik', value: 'Erik' },
|
||||
{ name: 'Katrien', value: 'Katrien' },
|
||||
{ name: 'Lennart', value: 'Lennart' },
|
||||
{ name: 'Lore', value: 'Lore' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'fr',
|
||||
name: 'French',
|
||||
voices: [
|
||||
{ name: 'Alain', value: 'Alain' },
|
||||
{ name: 'Hélène', value: 'Hélène' },
|
||||
{ name: 'Mathieu', value: 'Mathieu' },
|
||||
{ name: 'Étienne', value: 'Étienne' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'de',
|
||||
name: 'German',
|
||||
voices: [
|
||||
{ name: 'Johanna', value: 'Johanna' },
|
||||
{ name: 'Josef', value: 'Josef' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'it',
|
||||
name: 'Italian',
|
||||
voices: [
|
||||
{ name: 'Gianni', value: 'Gianni' },
|
||||
{ name: 'Orietta', value: 'Orietta' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'ja',
|
||||
name: 'Japanese',
|
||||
voices: [
|
||||
{ name: 'Asuka', value: 'Asuka' },
|
||||
{ name: 'Satoshi', value: 'Satoshi' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'ko',
|
||||
name: 'Korean',
|
||||
voices: [
|
||||
{ name: 'Hyunwoo', value: 'Hyunwoo' },
|
||||
{ name: 'Minji', value: 'Minji' },
|
||||
{ name: 'Seojun', value: 'Seojun' },
|
||||
{ name: 'Yoona', value: 'Yoona' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'pl',
|
||||
name: 'Polish',
|
||||
voices: [
|
||||
{ name: 'Szymon', value: 'Szymon' },
|
||||
{ name: 'Wojciech', value: 'Wojciech' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'pt',
|
||||
name: 'Portuguese',
|
||||
voices: [
|
||||
{ name: 'Heitor', value: 'Heitor' },
|
||||
{ name: 'Maitê', value: 'Maitê' },
|
||||
],
|
||||
},
|
||||
{
|
||||
value: 'es',
|
||||
name: 'Spanish',
|
||||
voices: [
|
||||
{ name: 'Diego', value: 'Diego' },
|
||||
{ name: 'Lupita', value: 'Lupita' },
|
||||
{ name: 'Miguel', value: 'Miguel' },
|
||||
{ name: 'Rafael', value: 'Rafael' },
|
||||
],
|
||||
},
|
||||
];
|
||||
5
lib/utils/speech-data/tts-model-inworld.js
Normal file
5
lib/utils/speech-data/tts-model-inworld.js
Normal file
@@ -0,0 +1,5 @@
|
||||
module.exports = [
|
||||
{ name: 'Llama Inworld TTS', value: 'inworld-tts-1' },
|
||||
{ name: 'Llama Inworld TTS Max', value: 'inworld-tts-1-max' },
|
||||
];
|
||||
|
||||
@@ -19,6 +19,7 @@ const TtsElevenlabsLanguagesVoices = require('./speech-data/tts-elevenlabs');
|
||||
const TtsWhisperLanguagesVoices = require('./speech-data/tts-whisper');
|
||||
const TtsPlayHtLanguagesVoices = require('./speech-data/tts-playht');
|
||||
const TtsVerbioLanguagesVoices = require('./speech-data/tts-verbio');
|
||||
const TtsInworldLanguagesVoices = require('./speech-data/tts-inworld');
|
||||
const ttsCartesia = require('./speech-data/tts-cartesia');
|
||||
|
||||
const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
|
||||
@@ -28,6 +29,7 @@ const TtsModelWhisper = require('./speech-data/tts-model-whisper');
|
||||
const TtsModelPlayHT = require('./speech-data/tts-model-playht');
|
||||
const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
|
||||
const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
|
||||
const TtsModelInworld = require('./speech-data/tts-model-inworld');
|
||||
const TtsModelCartesia = require('./speech-data/tts-model-cartesia');
|
||||
const TtsModelOpenai = require('./speech-data/tts-model-openai');
|
||||
|
||||
@@ -382,6 +384,28 @@ const testRimelabs = async(logger, synthAudio, credentials) => {
|
||||
}
|
||||
};
|
||||
|
||||
const testInworld = async(logger, synthAudio, credentials) => {
|
||||
try {
|
||||
await synthAudio(
|
||||
{
|
||||
increment: () => {},
|
||||
histogram: () => {}
|
||||
},
|
||||
{
|
||||
vendor: 'inworld',
|
||||
credentials,
|
||||
language: 'en',
|
||||
voice: 'Ashley',
|
||||
text: 'Hi there and welcome to jambones!',
|
||||
renderForCaching: true
|
||||
}
|
||||
);
|
||||
} catch (err) {
|
||||
logger.info({err}, 'synth inworld returned error');
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
const testWhisper = async(logger, synthAudio, credentials) => {
|
||||
try {
|
||||
await synthAudio({increment: () => {}, histogram: () => {}},
|
||||
@@ -683,6 +707,11 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
obj.model_id = o.model_id;
|
||||
obj.stt_model_id = o.stt_model_id;
|
||||
obj.options = o.options;
|
||||
} else if ('inworld' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
obj.model_id = o.model_id;
|
||||
obj.options = o.options;
|
||||
} else if ('rimelabs' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
@@ -765,6 +794,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
|
||||
return await getLanguagesVoicesForPlayHT(credential, getTtsVoices, logger);
|
||||
case 'rimelabs':
|
||||
return await getLanguagesVoicesForRimelabs(credential, getTtsVoices, logger);
|
||||
case 'inworld':
|
||||
return await getLanguagesVoicesForInworld(credential, getTtsVoices, logger);
|
||||
case 'assemblyai':
|
||||
return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
|
||||
case 'voxist':
|
||||
@@ -1130,6 +1161,46 @@ async function getLanguagesVoicesForRimelabs(credential) {
|
||||
return tranform(ttsVoices, undefined, TtsModelRimelabs);
|
||||
}
|
||||
|
||||
async function getLanguagesVoicesForInworld(credential) {
|
||||
const api_key = credential ? credential.api_key : null;
|
||||
if (!api_key) {
|
||||
return tranform(TtsInworldLanguagesVoices, undefined, TtsModelInworld);
|
||||
}
|
||||
const response = await fetch('https://api.inworld.ai/tts/v1/voices', {
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': `Basic ${api_key}`
|
||||
}
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error('failed to list models');
|
||||
}
|
||||
const data = await response.json();
|
||||
|
||||
const ttsVoices = data.voices.reduce((acc, voice) => {
|
||||
// Process each language for this voice
|
||||
voice.languages.forEach((languageCode) => {
|
||||
const existingLanguage = acc.find((lang) => lang.value === languageCode);
|
||||
const voiceEntry = {
|
||||
name: voice.displayName || capitalizeFirst(voice.voiceId),
|
||||
value: voice.voiceId
|
||||
};
|
||||
|
||||
if (existingLanguage) {
|
||||
existingLanguage.voices.push(voiceEntry);
|
||||
} else {
|
||||
acc.push({
|
||||
value: languageCode,
|
||||
name: capitalizeFirst(languageCode),
|
||||
voices: [voiceEntry]
|
||||
});
|
||||
}
|
||||
});
|
||||
return acc;
|
||||
}, []);
|
||||
return tranform(ttsVoices, undefined, TtsModelInworld);
|
||||
}
|
||||
|
||||
async function getLanguagesVoicesForAssemblyAI(credential) {
|
||||
return tranform(undefined, SttAssemblyaiLanguagesVoices);
|
||||
}
|
||||
@@ -1442,6 +1513,7 @@ module.exports = {
|
||||
testElevenlabs,
|
||||
testPlayHT,
|
||||
testRimelabs,
|
||||
testInworld,
|
||||
testAssemblyStt,
|
||||
testDeepgramTTS,
|
||||
getSpeechCredential,
|
||||
|
||||
8
package-lock.json
generated
8
package-lock.json
generated
@@ -20,7 +20,7 @@
|
||||
"@jambonz/lamejs": "^1.2.2",
|
||||
"@jambonz/mw-registrar": "^0.2.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.8.14",
|
||||
"@jambonz/speech-utils": "^0.2.10",
|
||||
"@jambonz/speech-utils": "^0.2.13",
|
||||
"@jambonz/time-series": "^0.2.8",
|
||||
"@jambonz/verb-specifications": "^0.0.104",
|
||||
"@soniox/soniox-node": "^1.2.2",
|
||||
@@ -4155,9 +4155,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils": {
|
||||
"version": "0.2.12",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.2.12.tgz",
|
||||
"integrity": "sha512-1xik/ZRUtPE2SOztxweGI+RTXUbiUXRShJ8G/l7VJJBkSWbfKKerYIRfHicAPumHicaUrbqSzZ6hr0eghv80KA==",
|
||||
"version": "0.2.13",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.2.13.tgz",
|
||||
"integrity": "sha512-8ISTWTfz3fWtPmzPDsZG8zgnf6pTjLA1WasMAF/d/ktGswqVsbhoPcDh5ZyZ7BsEqOMLMIv2Hn0ESmrBuMn5kw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"23": "^0.0.0",
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
"@jambonz/lamejs": "^1.2.2",
|
||||
"@jambonz/mw-registrar": "^0.2.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.8.14",
|
||||
"@jambonz/speech-utils": "^0.2.10",
|
||||
"@jambonz/speech-utils": "^0.2.13",
|
||||
"@jambonz/time-series": "^0.2.8",
|
||||
"@jambonz/verb-specifications": "^0.0.104",
|
||||
"@soniox/soniox-node": "^1.2.2",
|
||||
|
||||
@@ -717,6 +717,28 @@ test('speech credentials tests', async(t) => {
|
||||
t.ok(result.statusCode === 204, 'successfully deleted speech credential for rimelabs');
|
||||
|
||||
|
||||
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
|
||||
resolveWithFullResponse: true,
|
||||
auth: authUser,
|
||||
json: true,
|
||||
body: {
|
||||
vendor: 'inworld',
|
||||
use_for_stt: false,
|
||||
use_for_tts: true,
|
||||
api_key: 'asdasdasdasddsadasda',
|
||||
model_id: 'inworld-tts-1',
|
||||
}
|
||||
});
|
||||
t.ok(result.statusCode === 201, 'successfully added speech credential for inworld');
|
||||
const inworld_sid = result.body.sid;
|
||||
|
||||
/* delete the credential */
|
||||
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${inworld_sid}`, {
|
||||
auth: authUser,
|
||||
resolveWithFullResponse: true,
|
||||
});
|
||||
t.ok(result.statusCode === 204, 'successfully deleted speech credential for inworld');
|
||||
|
||||
/* add a credential for custom voices google */
|
||||
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
|
||||
resolveWithFullResponse: true,
|
||||
|
||||
Reference in New Issue
Block a user