mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2026-01-25 02:08:24 +00:00
support tts cartesia (#370)
* support tts cartesia * update speech utils * revert reset password * revert serve-integration
This commit is contained in:
@@ -27,6 +27,7 @@ const TtsModelWhisper = require('./speech-data/tts-model-whisper');
|
||||
const TtsModelPlayHT = require('./speech-data/tts-model-playht');
|
||||
const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
|
||||
const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
|
||||
const TtsModelCartesia = require('./speech-data/tts-model-cartesia');
|
||||
|
||||
const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
|
||||
const SttAwsLanguagesVoices = require('./speech-data/stt-aws');
|
||||
@@ -40,6 +41,8 @@ const SttSonioxLanguagesVoices = require('./speech-data/stt-soniox');
|
||||
const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics');
|
||||
const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
|
||||
const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
|
||||
const ttsCartesia = require('./speech-data/tts-cartesia');
|
||||
const ttsModelCartesia = require('./speech-data/tts-model-cartesia');
|
||||
|
||||
|
||||
const testSonioxStt = async(logger, credentials) => {
|
||||
@@ -606,6 +609,11 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
obj.user_id = o.user_id;
|
||||
obj.voice_engine = o.voice_engine;
|
||||
obj.options = o.options;
|
||||
} else if ('cartesia' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
obj.model_id = o.model_id;
|
||||
obj.options = o.options;
|
||||
} else if ('rimelabs' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
@@ -688,6 +696,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
|
||||
return await getLanguagesVoicesForVerbio(credential, getTtsVoices, logger);
|
||||
case 'speechmatics':
|
||||
return await getLanguagesVoicesForSpeechmatics(credential, getTtsVoices, logger);
|
||||
case 'cartesia':
|
||||
return await getLanguagesVoicesForCartesia(credential, getTtsVoices, logger);
|
||||
default:
|
||||
logger.info(`invalid vendor ${vendor}, return empty result`);
|
||||
throw new Error(`Invalid vendor ${vendor}`);
|
||||
@@ -1143,6 +1153,95 @@ function parseVerbioLanguagesVoices(data) {
|
||||
}, []);
|
||||
}
|
||||
|
||||
const fetchCartesiaVoices = async(credential) => {
|
||||
if (credential) {
|
||||
const get = bent('https://api.cartesia.ai', 'GET', 'json', {
|
||||
'X-API-Key' : credential.api_key,
|
||||
'Cartesia-Version': '2024-06-10',
|
||||
'Accept': 'application/json'
|
||||
});
|
||||
|
||||
const voices = await get('/voices');
|
||||
return voices;
|
||||
}
|
||||
};
|
||||
|
||||
const testCartesia = async(logger, synthAudio, credentials) => {
|
||||
try {
|
||||
await synthAudio(
|
||||
{
|
||||
increment: () => {},
|
||||
histogram: () => {}
|
||||
},
|
||||
{
|
||||
vendor: 'cartesia',
|
||||
credentials,
|
||||
language: 'en',
|
||||
voice: '694f9389-aac1-45b6-b726-9d9369183238',
|
||||
text: 'Hi there and welcome to jambones!',
|
||||
renderForCaching: true
|
||||
}
|
||||
);
|
||||
// Test if Cartesia can fetch voices
|
||||
await fetchCartesiaVoices(credentials);
|
||||
} catch (err) {
|
||||
logger.info({err}, 'synth cartesia returned error');
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
async function getLanguagesVoicesForCartesia(credential) {
|
||||
if (credential) {
|
||||
const {model_id} = credential;
|
||||
const {languages} = ttsModelCartesia.find((m) => m.value === model_id);
|
||||
const voices = await fetchCartesiaVoices(credential);
|
||||
|
||||
const buildVoice = (d) => (
|
||||
{
|
||||
value: `${d.id}`,
|
||||
name: `${d.name} - ${d.description}`
|
||||
});
|
||||
const languageMap = {
|
||||
en: 'English',
|
||||
fr: 'French',
|
||||
de: 'German',
|
||||
es: 'Spanish',
|
||||
pt: 'Portuguese',
|
||||
zh: 'Chinese',
|
||||
ja: 'Japanese',
|
||||
hi: 'Hindi',
|
||||
it: 'Italian',
|
||||
ko: 'Korean',
|
||||
nl: 'Dutch',
|
||||
pl: 'Polish',
|
||||
ru: 'Russian',
|
||||
sv: 'Swedish',
|
||||
tr: 'Turkish',
|
||||
};
|
||||
const ttsVoices = voices.reduce((acc, voice) => {
|
||||
if (!languages.includes(voice.language)) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
const languageCode = voice.language;
|
||||
const existingLanguage = acc.find((lang) => lang.value === languageCode);
|
||||
if (existingLanguage) {
|
||||
existingLanguage.voices.push(buildVoice(voice));
|
||||
} else {
|
||||
acc.push({
|
||||
value: languageCode,
|
||||
name: languageMap[languageCode],
|
||||
voices: [buildVoice(voice)]
|
||||
});
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
|
||||
return tranform(ttsVoices, undefined, TtsModelCartesia);
|
||||
}
|
||||
return tranform(ttsCartesia, undefined, TtsModelCartesia);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
testGoogleTts,
|
||||
testGoogleStt,
|
||||
@@ -1169,5 +1268,6 @@ module.exports = {
|
||||
testVerbioTts,
|
||||
testVerbioStt,
|
||||
getLanguagesAndVoicesForVendor,
|
||||
testSpeechmaticsStt
|
||||
testSpeechmaticsStt,
|
||||
testCartesia
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user