mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2025-12-19 05:47:46 +00:00
support playht3.0 languages (#357)
* support playht3.0 languages * update speech utils version
This commit is contained in:
@@ -875,7 +875,7 @@ router.get('/:sid/test', async(req, res) => {
|
||||
router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
|
||||
const {logger, getTtsVoices} = req.app.locals;
|
||||
try {
|
||||
const {vendor, label} = req.query;
|
||||
const {vendor, label, create_new} = req.query;
|
||||
if (!vendor) {
|
||||
throw new DbErrorBadRequest('vendor is required');
|
||||
}
|
||||
@@ -883,7 +883,7 @@ router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
|
||||
const service_provider_sid = req.user.service_provider_sid ||
|
||||
req.body.service_provider_sid || parseServiceProviderSid(req);
|
||||
|
||||
const credentials = await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
|
||||
const credentials = create_new ? null : await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
|
||||
service_provider_sid, account_sid, vendor, label);
|
||||
const tmp = credentials && credentials.length > 0 ? credentials[0] : null;
|
||||
const cred = tmp ? JSON.parse(decrypt(tmp.credential)) : null;
|
||||
|
||||
152
lib/utils/speech-data/tts-languages-playht.js
Normal file
152
lib/utils/speech-data/tts-languages-playht.js
Normal file
@@ -0,0 +1,152 @@
|
||||
// languages.js
|
||||
|
||||
module.exports = [
|
||||
{
|
||||
name: 'English',
|
||||
value: 'english'
|
||||
},
|
||||
{
|
||||
name: 'Mandarin',
|
||||
value: 'mandarin'
|
||||
},
|
||||
{
|
||||
name: 'Hindi',
|
||||
value: 'hindi'
|
||||
},
|
||||
{
|
||||
name: 'Japanese',
|
||||
value: 'japanese'
|
||||
},
|
||||
{
|
||||
name: 'Korean',
|
||||
value: 'korean'
|
||||
},
|
||||
{
|
||||
name: 'Arabic',
|
||||
value: 'arabic'
|
||||
},
|
||||
{
|
||||
name: 'Spanish',
|
||||
value: 'spanish'
|
||||
},
|
||||
{
|
||||
name: 'French',
|
||||
value: 'french'
|
||||
},
|
||||
{
|
||||
name: 'Italian',
|
||||
value: 'italian'
|
||||
},
|
||||
{
|
||||
name: 'Portuguese',
|
||||
value: 'portuguese'
|
||||
},
|
||||
{
|
||||
name: 'German',
|
||||
value: 'german'
|
||||
},
|
||||
{
|
||||
name: 'Dutch',
|
||||
value: 'dutch'
|
||||
},
|
||||
{
|
||||
name: 'Swedish',
|
||||
value: 'swedish'
|
||||
},
|
||||
{
|
||||
name: 'Czech',
|
||||
value: 'czech'
|
||||
},
|
||||
{
|
||||
name: 'Polish',
|
||||
value: 'polish'
|
||||
},
|
||||
{
|
||||
name: 'Russian',
|
||||
value: 'russian'
|
||||
},
|
||||
{
|
||||
name: 'Bulgarian',
|
||||
value: 'bulgarian'
|
||||
},
|
||||
{
|
||||
name: 'Hebrew',
|
||||
value: 'hebrew'
|
||||
},
|
||||
{
|
||||
name: 'Greek',
|
||||
value: 'greek'
|
||||
},
|
||||
{
|
||||
name: 'Turkish',
|
||||
value: 'turkish'
|
||||
},
|
||||
{
|
||||
name: 'Afrikaans',
|
||||
value: 'afrikaans'
|
||||
},
|
||||
{
|
||||
name: 'Xhosa',
|
||||
value: 'xhosa'
|
||||
},
|
||||
{
|
||||
name: 'Tagalog',
|
||||
value: 'tagalog'
|
||||
},
|
||||
{
|
||||
name: 'Malay',
|
||||
value: 'malay'
|
||||
},
|
||||
{
|
||||
name: 'Indonesian',
|
||||
value: 'indonesian'
|
||||
},
|
||||
{
|
||||
name: 'Bengali',
|
||||
value: 'bengali'
|
||||
},
|
||||
{
|
||||
name: 'Serbian',
|
||||
value: 'serbian'
|
||||
},
|
||||
{
|
||||
name: 'Thai',
|
||||
value: 'thai'
|
||||
},
|
||||
{
|
||||
name: 'Urdu',
|
||||
value: 'urdu'
|
||||
},
|
||||
{
|
||||
name: 'Croatian',
|
||||
value: 'croatian'
|
||||
},
|
||||
{
|
||||
name: 'Hungarian',
|
||||
value: 'hungarian'
|
||||
},
|
||||
{
|
||||
name: 'Danish',
|
||||
value: 'danish'
|
||||
},
|
||||
{
|
||||
name: 'Amharic',
|
||||
value: 'amharic'
|
||||
},
|
||||
{
|
||||
name: 'Albanian',
|
||||
value: 'albanian'
|
||||
},
|
||||
{
|
||||
name: 'Catalan',
|
||||
value: 'catalan'
|
||||
},
|
||||
{
|
||||
name: 'Ukrainian',
|
||||
value: 'ukrainian'
|
||||
},
|
||||
{
|
||||
name: 'Galician',
|
||||
value: 'galician'
|
||||
}
|
||||
];
|
||||
@@ -25,6 +25,7 @@ const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
|
||||
const TtsModelElevenLabs = require('./speech-data/tts-model-elevenlabs');
|
||||
const TtsModelWhisper = require('./speech-data/tts-model-whisper');
|
||||
const TtsModelPlayHT = require('./speech-data/tts-model-playht');
|
||||
const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
|
||||
const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
|
||||
|
||||
const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
|
||||
@@ -40,6 +41,7 @@ const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics')
|
||||
const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
|
||||
const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
|
||||
|
||||
|
||||
const testSonioxStt = async(logger, credentials) => {
|
||||
const api_key = credentials;
|
||||
const soniox = new SpeechClient(api_key);
|
||||
@@ -869,6 +871,7 @@ const fetchLayHTVoices = async(credential) => {
|
||||
|
||||
async function getLanguagesVoicesForPlayHT(credential) {
|
||||
if (credential) {
|
||||
const {voice_engine} = credential;
|
||||
const [cloned_voice, voices] = await fetchLayHTVoices(credential);
|
||||
const list_voices = [...cloned_voice, ...voices];
|
||||
|
||||
@@ -876,38 +879,57 @@ async function getLanguagesVoicesForPlayHT(credential) {
|
||||
let name = `${d.name} -${concat(d.accent)}${concat(d.age)}${concat(d.gender)}${concat(d.loudness)}` +
|
||||
`${concat(d.style)}${concat(d.tempo)}${concat(d.texture)}` ;
|
||||
name = name.endsWith(',') ? name.trim().slice(0, -1) : name;
|
||||
name += !d.language_code ? ' - Custom Voice' : '';
|
||||
|
||||
return {
|
||||
value: `${d.id}`,
|
||||
name
|
||||
};
|
||||
};
|
||||
|
||||
const ttsVoices = list_voices.reduce((acc, voice) => {
|
||||
// Play3.0 support all voice for PlayHT2.0*
|
||||
const filteredVoiceEngine = credential.voice_engine === 'Play3.0' ?
|
||||
`${credential.voice_engine}_PlayHT2.0_PlayHT2.0-turbo` : credential.voice_engine;
|
||||
if (!filteredVoiceEngine.includes(voice.voice_engine)) {
|
||||
const buildPlay30Payload = () => {
|
||||
// PlayHT3.0 can play different languages with differrent voice.
|
||||
// all voices will be added to english language by default and orther langauges will get voices from english.
|
||||
const ttsVoices = ttsLanguagesPlayHt.map((l) => ({
|
||||
...l,
|
||||
voices: l.value === 'english' ? list_voices.map((v) => buildVoice(v)) : []
|
||||
}));
|
||||
return tranform(ttsVoices, undefined, TtsModelPlayHT);
|
||||
};
|
||||
|
||||
const buildPayload = () => {
|
||||
const ttsVoices = list_voices.reduce((acc, voice) => {
|
||||
if (!voice_engine.includes(voice.voice_engine)) {
|
||||
return acc;
|
||||
}
|
||||
const languageCode = voice.language_code;
|
||||
// custom voice does not have language code
|
||||
if (!languageCode) {
|
||||
voice.language_code = 'en';
|
||||
voice.language = 'Custom-English';
|
||||
}
|
||||
const existingLanguage = acc.find((lang) => lang.value === languageCode);
|
||||
if (existingLanguage) {
|
||||
existingLanguage.voices.push(buildVoice(voice));
|
||||
} else {
|
||||
acc.push({
|
||||
value: voice.language_code,
|
||||
name: voice.language,
|
||||
voices: [buildVoice(voice)]
|
||||
});
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
const languageCode = voice.language_code;
|
||||
// custom voice does not have language code
|
||||
if (!languageCode) {
|
||||
voice.language_code = 'en';
|
||||
voice.language = 'Custom-English';
|
||||
}
|
||||
const existingLanguage = acc.find((lang) => lang.value === languageCode);
|
||||
if (existingLanguage) {
|
||||
existingLanguage.voices.push(buildVoice(voice));
|
||||
} else {
|
||||
acc.push({
|
||||
value: voice.language_code,
|
||||
name: voice.language,
|
||||
voices: [buildVoice(voice)]
|
||||
});
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
return tranform(ttsVoices, undefined, TtsModelPlayHT);
|
||||
}, []);
|
||||
return tranform(ttsVoices, undefined, TtsModelPlayHT);
|
||||
};
|
||||
|
||||
switch (voice_engine) {
|
||||
case 'Play3.0':
|
||||
return buildPlay30Payload();
|
||||
|
||||
default:
|
||||
return buildPayload();
|
||||
}
|
||||
}
|
||||
return tranform(TtsPlayHtLanguagesVoices, undefined, TtsModelPlayHT);
|
||||
}
|
||||
|
||||
8
package-lock.json
generated
8
package-lock.json
generated
@@ -19,7 +19,7 @@
|
||||
"@jambonz/lamejs": "^1.2.2",
|
||||
"@jambonz/mw-registrar": "^0.2.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.8.10",
|
||||
"@jambonz/speech-utils": "^0.1.18",
|
||||
"@jambonz/speech-utils": "^0.1.19",
|
||||
"@jambonz/time-series": "^0.2.8",
|
||||
"@jambonz/verb-specifications": "^0.0.72",
|
||||
"@soniox/soniox-node": "^1.2.2",
|
||||
@@ -2224,9 +2224,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils": {
|
||||
"version": "0.1.18",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.18.tgz",
|
||||
"integrity": "sha512-GlcPvUIKcyiiH4cfUPXyYZtP1HIIdFbrqYUmeTmeBaOuZUrJ0xW+TAp/pbysh54vgPnAfcS43Y3ciULx0S3IjQ==",
|
||||
"version": "0.1.19",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.19.tgz",
|
||||
"integrity": "sha512-3R4o1zJYnUIqMsT2GmYAItcAnhEz6NVTNqk5aHSAvBLFSMpqdeEPrTPaKVmQAPS8aIY7crhbJmJx8L3i2xDzDg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-polly": "^3.496.0",
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
"@jambonz/lamejs": "^1.2.2",
|
||||
"@jambonz/mw-registrar": "^0.2.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.8.10",
|
||||
"@jambonz/speech-utils": "^0.1.18",
|
||||
"@jambonz/speech-utils": "^0.1.19",
|
||||
"@jambonz/time-series": "^0.2.8",
|
||||
"@jambonz/verb-specifications": "^0.0.72",
|
||||
"@soniox/soniox-node": "^1.2.2",
|
||||
|
||||
Reference in New Issue
Block a user