support google gemini tts (#534)

* support google gemini tts

* wip

* wip

* wip

* wip

* wip

* support speech utils
This commit is contained in:
Hoan Luu Huu
2026-01-22 20:24:05 +07:00
committed by GitHub
parent 8181d56a48
commit 27addfa543
5 changed files with 706 additions and 24 deletions
+13 -3
View File
@@ -173,14 +173,23 @@ const encryptCredential = (obj) => {
switch (vendor) {
case 'google':
assert(service_key, 'invalid json key: service_key is required');
let modified_service_key = service_key;
try {
const o = JSON.parse(service_key);
// support google gemini tts
if (model_id) {
o.model_id = model_id;
} else {
delete o.model_id;
}
assert(o.client_email && o.private_key, 'invalid google service account key');
modified_service_key = JSON.stringify(o);
}
catch (err) {
assert(false, 'invalid google service account key - not JSON');
}
return encrypt(service_key);
return encrypt(modified_service_key);
case 'aws':
// AWS polly can work for 3 types of credentials:
@@ -582,7 +591,7 @@ router.put('/:sid', async(req, res) => {
custom_tts_url,
custom_tts_streaming_url,
cobalt_server_uri,
model_id,
model_id: model_id !== undefined ? model_id : o.model_id,
stt_model_id,
voice_engine,
options,
@@ -596,7 +605,8 @@ router.put('/:sid', async(req, res) => {
resemble_tts_uri,
resemble_tts_use_tls,
api_uri,
houndify_server_uri
houndify_server_uri,
...(vendor === 'google' && {service_key: JSON.stringify(o)})
};
logger.info({o, newCred}, 'updating speech credential with this new credential');
obj.credential = encryptCredential(newCred);
+5 -3
View File
@@ -779,6 +779,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
o.private_key.slice(key_header.length, o.private_key.length)}`
};
obj.service_key = JSON.stringify(obscured);
obj.model_id = o.model_id || null;
}
else if ('aws' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
@@ -1524,11 +1525,11 @@ function parseGooglelanguagesVoices(data) {
return data.reduce((acc, voice) => {
const languageCode = voice.languageCodes[0];
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push({
value: voice.name,
name: `${voice.name.substring(languageCode.length + 1, voice.name.length)} (${voice.ssmlGender})`
name: `${voice.name.startsWith(languageCode) ?
voice.name.substring(languageCode.length + 1, voice.name.length) : voice.name} (${voice.ssmlGender})`
});
} else {
acc.push({
@@ -1536,7 +1537,8 @@ function parseGooglelanguagesVoices(data) {
name: SttGoogleLanguagesVoices.find((lang) => lang.value === languageCode)?.name || languageCode,
voices: [{
value: voice.name,
name: `${voice.name.substring(languageCode.length + 1, voice.name.length)} (${voice.ssmlGender})`
name: `${voice.name.startsWith(languageCode) ?
voice.name.substring(languageCode.length + 1, voice.name.length) : voice.name} (${voice.ssmlGender})`
}]
});
}