mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
wip
This commit is contained in:
@@ -417,7 +417,7 @@ const synthGoogle = async(logger, {
|
|||||||
}) => {
|
}) => {
|
||||||
const client = new ttsGoogle.TextToSpeechClient(credentials);
|
const client = new ttsGoogle.TextToSpeechClient(credentials);
|
||||||
|
|
||||||
const isGemini = credentials.use_gemini_tts;
|
const isGemini = !!model;
|
||||||
const isVoiceCloning = typeof voice === 'object' && voice.voice_cloning_key;
|
const isVoiceCloning = typeof voice === 'object' && voice.voice_cloning_key;
|
||||||
|
|
||||||
// Build input based on voice type
|
// Build input based on voice type
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ test('Google Gemini TTS synth tests', async(t) => {
|
|||||||
const creds = JSON.parse(str);
|
const creds = JSON.parse(str);
|
||||||
const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts';
|
const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts';
|
||||||
|
|
||||||
// Test basic Gemini TTS synthesis
|
// Test Gemini TTS with model and instructions (both required for Gemini)
|
||||||
let result = await synthAudio(stats, {
|
let result = await synthAudio(stats, {
|
||||||
vendor: 'google',
|
vendor: 'google',
|
||||||
credentials: {
|
credentials: {
|
||||||
@@ -187,17 +187,17 @@ test('Google Gemini TTS synth tests', async(t) => {
|
|||||||
client_email: creds.client_email,
|
client_email: creds.client_email,
|
||||||
private_key: creds.private_key,
|
private_key: creds.private_key,
|
||||||
},
|
},
|
||||||
use_gemini_tts: true
|
|
||||||
},
|
},
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 'Kore',
|
voice: 'Kore',
|
||||||
model: geminiModel,
|
model: geminiModel,
|
||||||
text: 'Hello, this is a test of Google Gemini text to speech.',
|
text: 'Hello, this is a test of Google Gemini text to speech.',
|
||||||
|
instructions: 'Speak clearly and naturally.',
|
||||||
});
|
});
|
||||||
t.ok(!result.servedFromCache, `successfully synthesized Google Gemini TTS audio to ${result.filePath}`);
|
t.ok(!result.servedFromCache, `successfully synthesized Google Gemini TTS audio to ${result.filePath}`);
|
||||||
t.ok(result.filePath.endsWith('.r24'), 'Gemini TTS audio file has correct extension');
|
t.ok(result.filePath.endsWith('.wav'), 'Gemini TTS audio file has correct extension');
|
||||||
|
|
||||||
// Test Gemini TTS with instructions (prompt)
|
// Test Gemini TTS with different voice and instructions
|
||||||
result = await synthAudio(stats, {
|
result = await synthAudio(stats, {
|
||||||
vendor: 'google',
|
vendor: 'google',
|
||||||
credentials: {
|
credentials: {
|
||||||
@@ -205,7 +205,6 @@ test('Google Gemini TTS synth tests', async(t) => {
|
|||||||
client_email: creds.client_email,
|
client_email: creds.client_email,
|
||||||
private_key: creds.private_key,
|
private_key: creds.private_key,
|
||||||
},
|
},
|
||||||
use_gemini_tts: true
|
|
||||||
},
|
},
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 'Charon',
|
voice: 'Charon',
|
||||||
@@ -223,12 +222,12 @@ test('Google Gemini TTS synth tests', async(t) => {
|
|||||||
client_email: creds.client_email,
|
client_email: creds.client_email,
|
||||||
private_key: creds.private_key,
|
private_key: creds.private_key,
|
||||||
},
|
},
|
||||||
use_gemini_tts: true
|
|
||||||
},
|
},
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 'Kore',
|
voice: 'Kore',
|
||||||
model: geminiModel,
|
model: geminiModel,
|
||||||
text: 'Hello, this is a test of Google Gemini text to speech.',
|
text: 'Hello, this is a test of Google Gemini text to speech.',
|
||||||
|
instructions: 'Speak clearly and naturally.',
|
||||||
});
|
});
|
||||||
t.ok(result.servedFromCache, `successfully retrieved Gemini TTS audio from cache ${result.filePath}`);
|
t.ok(result.servedFromCache, `successfully retrieved Gemini TTS audio from cache ${result.filePath}`);
|
||||||
|
|
||||||
@@ -240,12 +239,12 @@ test('Google Gemini TTS synth tests', async(t) => {
|
|||||||
client_email: creds.client_email,
|
client_email: creds.client_email,
|
||||||
private_key: creds.private_key,
|
private_key: creds.private_key,
|
||||||
},
|
},
|
||||||
use_gemini_tts: true
|
|
||||||
},
|
},
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 'Leda',
|
voice: 'Leda',
|
||||||
model: geminiModel,
|
model: geminiModel,
|
||||||
text: '<speak>This SSML should be stripped for Gemini TTS.</speak>',
|
text: '<speak>This SSML should be stripped for Gemini TTS.</speak>',
|
||||||
|
instructions: 'Speak naturally.',
|
||||||
disableTtsCache: true
|
disableTtsCache: true
|
||||||
});
|
});
|
||||||
t.ok(!result.servedFromCache, `successfully synthesized Gemini TTS with SSML stripped to ${result.filePath}`);
|
t.ok(!result.servedFromCache, `successfully synthesized Gemini TTS with SSML stripped to ${result.filePath}`);
|
||||||
|
|||||||
Reference in New Issue
Block a user