diff --git a/lib/synth-audio.js b/lib/synth-audio.js index f5e812e..5cf8a19 100644 --- a/lib/synth-audio.js +++ b/lib/synth-audio.js @@ -417,7 +417,7 @@ const synthGoogle = async(logger, { }) => { const client = new ttsGoogle.TextToSpeechClient(credentials); - const isGemini = credentials.use_gemini_tts; + const isGemini = !!model; const isVoiceCloning = typeof voice === 'object' && voice.voice_cloning_key; // Build input based on voice type diff --git a/test/synth.js b/test/synth.js index 43f9da2..b62acc4 100644 --- a/test/synth.js +++ b/test/synth.js @@ -179,7 +179,7 @@ test('Google Gemini TTS synth tests', async(t) => { const creds = JSON.parse(str); const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts'; - // Test basic Gemini TTS synthesis + // Test Gemini TTS with model and instructions (both required for Gemini) let result = await synthAudio(stats, { vendor: 'google', credentials: { @@ -187,17 +187,17 @@ test('Google Gemini TTS synth tests', async(t) => { client_email: creds.client_email, private_key: creds.private_key, }, - use_gemini_tts: true }, language: 'en-US', voice: 'Kore', model: geminiModel, text: 'Hello, this is a test of Google Gemini text to speech.', + instructions: 'Speak clearly and naturally.', }); t.ok(!result.servedFromCache, `successfully synthesized Google Gemini TTS audio to ${result.filePath}`); - t.ok(result.filePath.endsWith('.r24'), 'Gemini TTS audio file has correct extension'); + t.ok(result.filePath.endsWith('.wav'), 'Gemini TTS audio file has correct extension'); - // Test Gemini TTS with instructions (prompt) + // Test Gemini TTS with different voice and instructions result = await synthAudio(stats, { vendor: 'google', credentials: { @@ -205,7 +205,6 @@ test('Google Gemini TTS synth tests', async(t) => { client_email: creds.client_email, private_key: creds.private_key, }, - use_gemini_tts: true }, language: 'en-US', voice: 'Charon', @@ -223,12 +222,12 @@ test('Google Gemini TTS synth tests', async(t) => { client_email: creds.client_email, private_key: creds.private_key, }, - use_gemini_tts: true }, language: 'en-US', voice: 'Kore', model: geminiModel, text: 'Hello, this is a test of Google Gemini text to speech.', + instructions: 'Speak clearly and naturally.', }); t.ok(result.servedFromCache, `successfully retrieved Gemini TTS audio from cache ${result.filePath}`); @@ -240,12 +239,12 @@ test('Google Gemini TTS synth tests', async(t) => { client_email: creds.client_email, private_key: creds.private_key, }, - use_gemini_tts: true }, language: 'en-US', voice: 'Leda', model: geminiModel, text: 'This SSML should be stripped for Gemini TTS.', + instructions: 'Speak naturally.', disableTtsCache: true }); t.ok(!result.servedFromCache, `successfully synthesized Gemini TTS with SSML stripped to ${result.filePath}`);