support inworld tts (#472)

* support inworld tts

* inworld tts voices
This commit is contained in:
Hoan Luu Huu
2025-06-27 22:12:00 +07:00
committed by GitHub
parent 0842793aea
commit 5421f1421f
7 changed files with 241 additions and 6 deletions

View File

@@ -14,7 +14,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
testSpeechmaticsStt,
testCartesia,
testVoxistStt,
testOpenAiStt} = require('../../utils/speech-utils');
testOpenAiStt,
testInworld} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -283,6 +284,12 @@ const encryptCredential = (obj) => {
const rimelabsData = JSON.stringify({api_key, model_id, options});
return encrypt(rimelabsData);
case 'inworld':
assert(api_key, 'invalid inworld speech credential: api_key is required');
assert(model_id, 'invalid inworld speech credential: model_id is required');
const inworldData = JSON.stringify({api_key, model_id, options});
return encrypt(inworldData);
case 'assemblyai':
assert(api_key, 'invalid assemblyai speech credential: api_key is required');
const assemblyaiData = JSON.stringify({api_key});
@@ -874,6 +881,17 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'inworld') {
if (cred.use_for_tts) {
try {
await testInworld(logger, synthAudio, credential);
results.tts.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.tts = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'rimelabs') {
if (cred.use_for_tts) {
try {

View File

@@ -0,0 +1,118 @@
module.exports = [
{
value: 'en',
name: 'English',
voices: [
{ name: 'Alex', value: 'Alex' },
{ name: 'Ashley', value: 'Ashley' },
{ name: 'Craig', value: 'Craig' },
{ name: 'Deborah', value: 'Deborah' },
{ name: 'Dennis', value: 'Dennis' },
{ name: 'Edward', value: 'Edward' },
{ name: 'Elizabeth', value: 'Elizabeth' },
{ name: 'Hades', value: 'Hades' },
{ name: 'Julia', value: 'Julia' },
{ name: 'Pixie', value: 'Pixie' },
{ name: 'Mark', value: 'Mark' },
{ name: 'Olivia', value: 'Olivia' },
{ name: 'Priya', value: 'Priya' },
{ name: 'Ronald', value: 'Ronald' },
{ name: 'Sarah', value: 'Sarah' },
{ name: 'Shaun', value: 'Shaun' },
{ name: 'Theodore', value: 'Theodore' },
{ name: 'Timothy', value: 'Timothy' },
{ name: 'Wendy', value: 'Wendy' },
{ name: 'Dominus', value: 'Dominus' },
],
},
{
value: 'zh',
name: 'Chinese',
voices: [
{ name: 'Yichen', value: 'Yichen' },
{ name: 'Xiaoyin', value: 'Xiaoyin' },
{ name: 'Xinyi', value: 'Xinyi' },
{ name: 'Jing', value: 'Jing' },
],
},
{
value: 'nl',
name: 'Dutch',
voices: [
{ name: 'Erik', value: 'Erik' },
{ name: 'Katrien', value: 'Katrien' },
{ name: 'Lennart', value: 'Lennart' },
{ name: 'Lore', value: 'Lore' },
],
},
{
value: 'fr',
name: 'French',
voices: [
{ name: 'Alain', value: 'Alain' },
{ name: 'Hélène', value: 'Hélène' },
{ name: 'Mathieu', value: 'Mathieu' },
{ name: 'Étienne', value: 'Étienne' },
],
},
{
value: 'de',
name: 'German',
voices: [
{ name: 'Johanna', value: 'Johanna' },
{ name: 'Josef', value: 'Josef' },
],
},
{
value: 'it',
name: 'Italian',
voices: [
{ name: 'Gianni', value: 'Gianni' },
{ name: 'Orietta', value: 'Orietta' },
],
},
{
value: 'ja',
name: 'Japanese',
voices: [
{ name: 'Asuka', value: 'Asuka' },
{ name: 'Satoshi', value: 'Satoshi' },
],
},
{
value: 'ko',
name: 'Korean',
voices: [
{ name: 'Hyunwoo', value: 'Hyunwoo' },
{ name: 'Minji', value: 'Minji' },
{ name: 'Seojun', value: 'Seojun' },
{ name: 'Yoona', value: 'Yoona' },
],
},
{
value: 'pl',
name: 'Polish',
voices: [
{ name: 'Szymon', value: 'Szymon' },
{ name: 'Wojciech', value: 'Wojciech' },
],
},
{
value: 'pt',
name: 'Portuguese',
voices: [
{ name: 'Heitor', value: 'Heitor' },
{ name: 'Maitê', value: 'Maitê' },
],
},
{
value: 'es',
name: 'Spanish',
voices: [
{ name: 'Diego', value: 'Diego' },
{ name: 'Lupita', value: 'Lupita' },
{ name: 'Miguel', value: 'Miguel' },
{ name: 'Rafael', value: 'Rafael' },
],
},
];

View File

@@ -0,0 +1,5 @@
module.exports = [
{ name: 'Llama Inworld TTS', value: 'inworld-tts-1' },
{ name: 'Llama Inworld TTS Max', value: 'inworld-tts-1-max' },
];

View File

@@ -19,6 +19,7 @@ const TtsElevenlabsLanguagesVoices = require('./speech-data/tts-elevenlabs');
const TtsWhisperLanguagesVoices = require('./speech-data/tts-whisper');
const TtsPlayHtLanguagesVoices = require('./speech-data/tts-playht');
const TtsVerbioLanguagesVoices = require('./speech-data/tts-verbio');
const TtsInworldLanguagesVoices = require('./speech-data/tts-inworld');
const ttsCartesia = require('./speech-data/tts-cartesia');
const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
@@ -28,6 +29,7 @@ const TtsModelWhisper = require('./speech-data/tts-model-whisper');
const TtsModelPlayHT = require('./speech-data/tts-model-playht');
const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
const TtsModelInworld = require('./speech-data/tts-model-inworld');
const TtsModelCartesia = require('./speech-data/tts-model-cartesia');
const TtsModelOpenai = require('./speech-data/tts-model-openai');
@@ -382,6 +384,28 @@ const testRimelabs = async(logger, synthAudio, credentials) => {
}
};
const testInworld = async(logger, synthAudio, credentials) => {
try {
await synthAudio(
{
increment: () => {},
histogram: () => {}
},
{
vendor: 'inworld',
credentials,
language: 'en',
voice: 'Ashley',
text: 'Hi there and welcome to jambones!',
renderForCaching: true
}
);
} catch (err) {
logger.info({err}, 'synth inworld returned error');
throw err;
}
};
const testWhisper = async(logger, synthAudio, credentials) => {
try {
await synthAudio({increment: () => {}, histogram: () => {}},
@@ -683,6 +707,11 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
obj.model_id = o.model_id;
obj.stt_model_id = o.stt_model_id;
obj.options = o.options;
} else if ('inworld' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.model_id = o.model_id;
obj.options = o.options;
} else if ('rimelabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
@@ -765,6 +794,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
return await getLanguagesVoicesForPlayHT(credential, getTtsVoices, logger);
case 'rimelabs':
return await getLanguagesVoicesForRimelabs(credential, getTtsVoices, logger);
case 'inworld':
return await getLanguagesVoicesForInworld(credential, getTtsVoices, logger);
case 'assemblyai':
return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
case 'voxist':
@@ -1130,6 +1161,46 @@ async function getLanguagesVoicesForRimelabs(credential) {
return tranform(ttsVoices, undefined, TtsModelRimelabs);
}
async function getLanguagesVoicesForInworld(credential) {
const api_key = credential ? credential.api_key : null;
if (!api_key) {
return tranform(TtsInworldLanguagesVoices, undefined, TtsModelInworld);
}
const response = await fetch('https://api.inworld.ai/tts/v1/voices', {
headers: {
'Accept': 'application/json',
'Authorization': `Basic ${api_key}`
}
});
if (!response.ok) {
throw new Error('failed to list models');
}
const data = await response.json();
const ttsVoices = data.voices.reduce((acc, voice) => {
// Process each language for this voice
voice.languages.forEach((languageCode) => {
const existingLanguage = acc.find((lang) => lang.value === languageCode);
const voiceEntry = {
name: voice.displayName || capitalizeFirst(voice.voiceId),
value: voice.voiceId
};
if (existingLanguage) {
existingLanguage.voices.push(voiceEntry);
} else {
acc.push({
value: languageCode,
name: capitalizeFirst(languageCode),
voices: [voiceEntry]
});
}
});
return acc;
}, []);
return tranform(ttsVoices, undefined, TtsModelInworld);
}
async function getLanguagesVoicesForAssemblyAI(credential) {
return tranform(undefined, SttAssemblyaiLanguagesVoices);
}
@@ -1442,6 +1513,7 @@ module.exports = {
testElevenlabs,
testPlayHT,
testRimelabs,
testInworld,
testAssemblyStt,
testDeepgramTTS,
getSpeechCredential,