support resemble TTS (#488)

* support resemble TTS

* wip

* wip

* update speech utils version

* update resemble voice list
This commit is contained in:
Hoan Luu Huu
2025-08-13 19:18:08 +07:00
committed by GitHub
parent 2b66a121a0
commit fd9dc77a58
6 changed files with 766 additions and 51 deletions

View File

@@ -15,7 +15,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
testCartesia,
testVoxistStt,
testOpenAiStt,
testInworld} = require('../../utils/speech-utils');
testInworld,
testResembleTTS} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -132,6 +133,8 @@ const encryptCredential = (obj) => {
deepgram_stt_use_tls,
deepgram_tts_uri,
playht_tts_uri,
resemble_tts_uri,
resemble_tts_use_tls,
use_custom_tts,
custom_tts_endpoint,
custom_tts_endpoint_url,
@@ -226,6 +229,15 @@ const encryptCredential = (obj) => {
deepgram_stt_use_tls, deepgram_tts_uri, model_id});
return encrypt(deepgramData);
case 'resemble':
assert(api_key, 'invalid resemble speech credential: api_key is required');
const resembleData = JSON.stringify({
api_key,
...(resemble_tts_uri && {resemble_tts_uri}),
...(resemble_tts_use_tls && {resemble_tts_use_tls})
});
return encrypt(resembleData);
case 'deepgramriver':
assert(api_key, 'invalid deepgram river speech credential: api_key is required');
const deepgramriverData = JSON.stringify({api_key});
@@ -523,7 +535,9 @@ router.put('/:sid', async(req, res) => {
playht_tts_uri,
engine_version,
service_version,
speechmatics_stt_uri
speechmatics_stt_uri,
resemble_tts_use_tls,
resemble_tts_uri
} = req.body;
const newCred = {
@@ -556,7 +570,9 @@ router.put('/:sid', async(req, res) => {
playht_tts_uri,
engine_version,
service_version,
speechmatics_stt_uri
speechmatics_stt_uri,
resemble_tts_uri,
resemble_tts_use_tls
};
logger.info({o, newCred}, 'updating speech credential with this new credential');
obj.credential = encryptCredential(newCred);
@@ -756,6 +772,17 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.sttTestResult(sid, false);
}
}
} else if (cred.vendor === 'resemble') {
if (cred.use_for_tts) {
try {
await testResembleTTS(logger, synthAudio, credential);
results.tts.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.tts = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'deepgram') {
const {api_key} = credential;
if (cred.use_for_tts) {

View File

@@ -0,0 +1,438 @@
module.exports = [
{
value: 'en-gb',
name: 'En-gb',
voices: [
{
name: 'Seth (Legacy) (professional) - Resemble Voice',
value: 'a52c4efc',
},
{
name: 'Seth (professional) - Resemble Voice',
value: 'd3e61caf',
},
],
},
{
value: 'en-GB',
name: 'En-GB',
voices: [
{
name: 'Beatrice Pendergast (professional) - Resemble Voice',
value: '00b1fd4e',
},
{
name: 'Ed Smart (professional) - Resemble Voice',
value: '0c755526',
},
{
name: 'Paula J (professional) - Resemble Voice',
value: '33e64cd2',
},
],
},
{
value: 'en-us',
name: 'En-us',
voices: [
{
name: 'David (professional) - Resemble Voice',
value: '5bb13f03',
},
],
},
{
value: 'en-US',
name: 'En-US',
voices: [
{
name: 'Adam Lofbomm (professional) - Resemble Voice',
value: '4e228dba',
},
{
name: 'Alex (professional) - Resemble Voice',
value: '41b99669',
},
{
name: 'Amelia (professional) - Resemble Voice',
value: 'ecbe5d97',
},
{
name: 'Andrew (rapid) - Resemble Marketplace',
value: 'd2f26a3e',
},
{
name: 'Annika (professional) - Resemble Voice',
value: 'b27f3cc0',
},
{
name: 'Arthur (professional) - Resemble Voice',
value: '9de11312',
},
{
name: 'Ash (professional) - Resemble Voice',
value: 'ee322483',
},
{
name: 'Aurora (professional) - Resemble Voice',
value: 'a72d9fca',
},
{
name: 'Austin (professional) - Resemble Voice',
value: '82a67e58',
},
{
name: 'Beth (Legacy) (professional) - Resemble Voice',
value: '25c7823f',
},
{
name: 'Beth (professional) - Resemble Voice',
value: 'fa66d263',
},
{
name: 'Blade (professional) - Resemble Voice',
value: '8bedd793',
},
{
name: 'Brandy Sky (professional) - Resemble Voice',
value: '79e2f1dc',
},
{
name: 'Brenley (professional) - Resemble Voice',
value: 'e6ec3ca4',
},
{
name: 'Britney (professional) - Resemble Voice',
value: 'e57e23ff',
},
{
name: 'Broadcast Joe (professional) - Resemble Voice',
value: '21e49584',
},
{
name: 'Carl Bishop (Angry) (professional) - Resemble Voice',
value: 'f06cd770',
},
{
name: 'Carl Bishop (Conversational) (professional) - Resemble Voice',
value: '7f40ff35',
},
{
name: 'Carl Bishop (Happy) (professional) - Resemble Voice',
value: '99751e42',
},
{
name: 'Carl Bishop (professional) - Resemble Voice',
value: '01bcc102',
},
{
name: 'Carl Bishop (Scared) (Legacy) (professional) - Resemble Voice',
value: '1dcf0222',
},
{
name: 'Carl Bishop (Scared) (professional) - Resemble Voice',
value: 'eacbc44f',
},
{
name: 'Charles (Legacy) (professional) - Resemble Voice',
value: '4c6d3da5',
},
{
name: 'Charles (professional) - Resemble Voice',
value: 'd79a5198',
},
{
name: 'Charlotte (professional) - Resemble Voice',
value: '96b91cf9',
},
{
name: 'Chris Whiting (professional) - Resemble Voice',
value: '95b7560a',
},
{
name: 'Cliff (professional) - Resemble Voice',
value: 'fcf8490c',
},
{
name: 'Connor (professional) - Resemble Voice',
value: 'a6131acf',
},
{
name: 'Deanna (professional) - Resemble Voice',
value: '0842fdf9',
},
{
name: 'Ember (professional) - Resemble Voice',
value: '55592656',
},
{
name: 'Gene Amore (professional) - Resemble Voice',
value: 'f2ea7aa0',
},
{
name: 'Harry Robinson (professional) - Resemble Voice',
value: '3c36d67d',
},
{
name: 'Helena (professional) - Resemble Voice',
value: 'ac948df2',
},
{
name: 'Hem (professional) - Resemble Voice',
value: 'b6edbe5f',
},
{
name: 'John (professional) - Resemble Voice',
value: 'ac48daeb',
},
{
name: 'Josh (professional) - Resemble Voice',
value: '987c99e9',
},
{
name: 'Julie Hoverson (professional) - Resemble Voice',
value: 'b119524c',
},
{
name: 'Justin (Legacy) (professional) - Resemble Voice',
value: 'b2d1bb75',
},
{
name: 'Justin (Meditative) (Legacy) (professional) - Resemble Voice',
value: '93ce0920',
},
{
name: 'Justin (Meditative) (professional) - Resemble Voice',
value: '2570000e',
},
{
name: 'Justin (professional) - Resemble Voice',
value: '9d513c17',
},
{
name: 'Karl Nordman (professional) - Resemble Voice',
value: 'da67f17e',
},
{
name: 'Kate (professional) - Resemble Voice',
value: '28b4cc5a',
},
{
name: 'Katya (professional) - Resemble Voice',
value: 'c9ee13b4',
},
{
name: 'Ken (professional) - Resemble Voice',
value: '3dbfbf3d',
},
{
name: 'Kessi (professional) - Resemble Voice',
value: '2211cb8c',
},
{
name: 'Little Ari (professional) - Resemble Voice',
value: '805adead',
},
{
name: 'Little Brittle (professional) - Resemble Voice',
value: '8a73f115',
},
{
name: 'Liz (professional) - Resemble Voice',
value: '4884d94a',
},
{
name: 'Lothar (professional) - Resemble Voice',
value: '78671217',
},
{
name: 'Luna (professional) - Resemble Voice',
value: 'ae8223ca',
},
{
name: 'Matt Weller (professional) - Resemble Voice',
value: 'f4da4639',
},
{
name: 'Maureen (Angry) (professional) - Resemble Voice',
value: '482babfc',
},
{
name: 'Maureen (Caring) (professional) - Resemble Voice',
value: 'b15e550f',
},
{
name: 'Maureen (Happy) (professional) - Resemble Voice',
value: '91947e5c',
},
{
name: 'Maureen (professional) - Resemble Voice',
value: '7d94218f',
},
{
name: 'Maureen (Sad) (professional) - Resemble Voice',
value: 'bca7481c',
},
{
name: 'Maureen (Scared) (professional) - Resemble Voice',
value: '251c9439',
},
{
name: 'Mauren (Announcer) (professional) - Resemble Voice',
value: 'e984fb89',
},
{
name: 'Melody (Legacy) (professional) - Resemble Voice',
value: '15be93bd',
},
{
name: 'Melody (professional) - Resemble Voice',
value: '1c49e774',
},
{
name: 'Mike (professional) - Resemble Voice',
value: '3a02dc40',
},
{
name: 'Niki (professional) - Resemble Voice',
value: 'db37643c',
},
{
name: 'Olga (professional) - Resemble Voice',
value: '07c1d6b5',
},
{
name: 'Olivia (Legacy) (professional) - Resemble Voice',
value: '405b58e3',
},
{
name: 'Olivia (professional) - Resemble Voice',
value: 'ef49f972',
},
{
name: 'Orion (professional) - Resemble Voice',
value: 'aa8053cc',
},
{
name: 'Pete (professional) - Resemble Voice',
value: '1864fd63',
},
{
name: 'Primrose (Legacy) (professional) - Resemble Voice',
value: '7c8e47ca',
},
{
name: 'Primrose (professional) - Resemble Voice',
value: '33eecc17',
},
{
name: 'Primrose (Whispering) (Legacy) (professional) - Resemble Voice',
value: 'a56c5c6f',
},
{
name: 'Primrose (Whispering) (professional) - Resemble Voice',
value: '28fcdf76',
},
{
name: 'Primrose (Winded) (Legacy) (professional) - Resemble Voice',
value: '6f9a77a4',
},
{
name: 'Primrose (Winded) (professional) - Resemble Voice',
value: '0097f246',
},
{
name: 'Professor Shaposhnikov (professional) - Resemble Voice',
value: '3f5fb9f1',
},
{
name: 'Radio Nikole (professional) - Resemble Voice',
value: '19eae884',
},
{
name: 'Richard Garifo (professional) - Resemble Voice',
value: '85ba84f2',
},
{
name: 'Rico (professional) - Resemble Voice',
value: '14ca34b3',
},
{
name: 'Robert (professional) - Resemble Voice',
value: '3e907bcc',
},
{
name: 'Rupert (rapid) - Resemble Voice',
value: '28f1626c',
},
{
name: 'Sam (professional) - Resemble Voice',
value: '0f2f9a7e',
},
{
name: 'Samantha (Legacy) (professional) - Resemble Voice',
value: '266bfae9',
},
{
name: 'Samantha (professional) - Resemble Voice',
value: 'e28236ee',
},
{
name: 'Siobhan (professional) - Resemble Voice',
value: 'af72c1ac',
},
{
name: 'Steve (Scared) (professional) - Resemble Voice',
value: 'aaa56e79',
},
{
name: 'Tanja (professional) - Resemble Voice',
value: 'adb84c77',
},
{
name: 'Tanja (Telephonic) (professional) - Resemble Voice',
value: '4f5a470b',
},
{
name: 'Tanja (Warm Word Weaver) (professional) - Resemble Voice',
value: 'abbbc383',
},
{
name: 'Tarkos (professional) - Resemble Voice',
value: '779842bf',
},
{
name: 'Tyler (professional) - Resemble Voice',
value: 'ff225977',
},
{
name: 'Vicky (professional) - Resemble Voice',
value: 'f453b918',
},
{
name: 'Vivian (Legacy) (professional) - Resemble Voice',
value: 'bed1044d',
},
{
name: 'Vivian (professional) - Resemble Voice',
value: '1ff0045f',
},
{
name: 'William (Whispering) (Legacy) (professional) - Resemble Voice',
value: '79eb7953',
},
{
name: 'William (Whispering) (professional) - Resemble Voice',
value: 'e2180df0',
},
{
name: 'Willow (Whispering) (professional) - Resemble Voice',
value: 'f2906c4a',
},
{
name: 'Willow II (Whispering) (professional) - Resemble Voice',
value: 'c815cd7a',
},
],
},
];

View File

@@ -21,6 +21,7 @@ const TtsPlayHtLanguagesVoices = require('./speech-data/tts-playht');
const TtsVerbioLanguagesVoices = require('./speech-data/tts-verbio');
const TtsInworldLanguagesVoices = require('./speech-data/tts-inworld');
const ttsCartesia = require('./speech-data/tts-cartesia');
const TtsResembleLanguagesVoices = require('./speech-data/tts-resemble');
const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
const TtsLanguagesDeepgram = require('./speech-data/tts-deepgram');
@@ -424,6 +425,24 @@ const testWhisper = async(logger, synthAudio, credentials) => {
}
};
const testResembleTTS = async(logger, synthAudio, credentials) => {
try {
await synthAudio({increment: () => {}, histogram: () => {}},
{
vendor: 'resemble',
credentials,
language: 'en-US',
voice: '3f5fb9f1',
text: 'Hi there and welcome to jambones!',
renderForCaching: true
}
);
} catch (err) {
logger.info({err}, 'synth resemble returned error');
throw err;
}
};
const testDeepgramTTS = async(logger, synthAudio, credentials) => {
try {
await synthAudio({increment: () => {}, histogram: () => {}},
@@ -729,6 +748,11 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.service_version = o.service_version;
} else if ('resemble' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.resemble_tts_uri = o.resemble_tts_uri;
obj.resemble_tts_use_tls = o.resemble_tts_use_tls;
} else if ('voxist' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
@@ -799,6 +823,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
return await getLanguagesVoicesForRimelabs(credential, getTtsVoices, logger);
case 'inworld':
return await getLanguagesVoicesForInworld(credential, getTtsVoices, logger);
case 'resemble':
return await getLanguagesAndVoicesForResemble(credential, getTtsVoices, logger);
case 'assemblyai':
return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
case 'voxist':
@@ -1240,6 +1266,82 @@ async function getLanguagesVoicesForVerbio(credentials, getTtsVoices, logger) {
}
}
async function getLanguagesAndVoicesForResemble(credential, getTtsVoices, logger) {
if (credential) {
try {
const {api_key} = credential;
let allVoices = [];
let page = 1;
let hasMorePages = true;
// Fetch all pages of voices
while (hasMorePages) {
const response = await fetch(`https://app.resemble.ai/api/v2/voices?page=${page}&page_size=100`, {
headers: {
'Authorization': `Token token=${api_key}`,
'Accept': 'application/json'
}
});
if (!response.ok) {
throw new Error('failed to list voices');
}
const data = await response.json();
if (!data.success) {
throw new Error('API returned unsuccessful response');
}
allVoices = allVoices.concat(data.items);
// Check if there are more pages
hasMorePages = page < data.num_pages;
page++;
}
// Filter only finished voices that support text_to_speech
const availableVoices = allVoices.filter((voice) =>
voice.status === 'finished' &&
voice.component_status?.text_to_speech?.status === 'ready'
);
// Group voices by language
const ttsVoices = availableVoices.reduce((acc, voice) => {
const languageCode = voice.default_language || 'en-US';
const existingLanguage = acc.find((lang) => lang.value === languageCode);
const voiceEntry = {
name: `${voice.name} (${voice.voice_type}) - ${voice.source}`,
value: voice.uuid
};
if (existingLanguage) {
existingLanguage.voices.push(voiceEntry);
} else {
acc.push({
value: languageCode,
name: capitalizeFirst(languageCode),
voices: [voiceEntry]
});
}
return acc;
}, []);
// Sort languages and voices
ttsVoices.sort((a, b) => a.name.localeCompare(b.name));
ttsVoices.forEach((lang) => {
lang.voices.sort((a, b) => a.name.localeCompare(b.name));
});
return tranform(ttsVoices);
} catch (err) {
logger.info('Error while fetching Resemble languages, voices, return predefined values', err);
}
}
return tranform(TtsResembleLanguagesVoices);
}
function tranform(tts, stt, models, sttModels) {
return {
...(tts && {tts}),
@@ -1528,5 +1630,6 @@ module.exports = {
testSpeechmaticsStt,
testCartesia,
testVoxistStt,
testOpenAiStt
testOpenAiStt,
testResembleTTS
};