wip

2026-07-04 19:31:49 +00:00 · 2026-01-14 13:51:32 +07:00
parent 6b2b35acfb
commit 0e33358254
2 changed files with 262 additions and 4 deletions
@@ -256,6 +256,226 @@ test('Google Gemini TTS synth tests', async(t) => {
  client.quit();
 });

+test('Google TTS streaming tests (!JAMBONES_DISABLE_TTS_STREAMING)', async(t) => {
+  // Ensure streaming is enabled (default behavior)
+  delete process.env.JAMBONES_DISABLE_TTS_STREAMING;
+
+  // Clear require cache to reload config with new env var
+  delete require.cache[require.resolve('../lib/config')];
+  delete require.cache[require.resolve('../lib/synth-audio')];
+  delete require.cache[require.resolve('..')];
+
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.GCP_FILE && !process.env.GCP_JSON_KEY) {
+    t.pass('skipping Google TTS streaming tests since neither GCP_FILE nor GCP_JSON_KEY provided');
+    return t.end();
+  }
+
+  try {
+    const str = process.env.GCP_JSON_KEY || fs.readFileSync(process.env.GCP_FILE);
+    const creds = JSON.parse(str);
+    const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts';
+
+    // Test 1: Standard voice streaming (use_live_api=0)
+    let result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'en-US-Wavenet-D',
+      gender: 'MALE',
+      text: 'This is a test of standard voice streaming.',
+      disableTtsCache: true
+    });
+    t.ok(result.filePath.startsWith('say:'), 'Standard voice returns streaming say: path');
+    t.ok(result.filePath.includes('vendor=google'), 'Standard voice streaming path contains vendor=google');
+    t.ok(result.filePath.includes('use_live_api=0'), 'Standard voice uses use_live_api=0');
+    t.ok(result.filePath.includes('voice=en-US-Wavenet-D'), 'Standard voice streaming path contains voice');
+
+    // Test 2: HD voice streaming (use_live_api=1)
+    result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'en-US-Chirp3-HD-Charon',
+      text: 'This is a test of HD voice streaming.',
+      disableTtsCache: true
+    });
+    t.ok(result.filePath.startsWith('say:'), 'HD voice returns streaming say: path');
+    t.ok(result.filePath.includes('vendor=google'), 'HD voice streaming path contains vendor=google');
+    t.ok(result.filePath.includes('use_live_api=1'), 'HD voice uses use_live_api=1 (Live API)');
+    t.ok(result.filePath.includes('voice=en-US-Chirp3-HD-Charon'), 'HD voice streaming path contains voice');
+
+    // Test 3: Gemini TTS streaming (use_live_api=1)
+    result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'Kore',
+      model: geminiModel,
+      text: 'This is a test of Gemini TTS streaming.',
+      instructions: 'Speak naturally.',
+      disableTtsCache: true
+    });
+    t.ok(result.filePath.startsWith('say:'), 'Gemini TTS returns streaming say: path');
+    t.ok(result.filePath.includes('vendor=google'), 'Gemini TTS streaming path contains vendor=google');
+    t.ok(result.filePath.includes('use_live_api=1'), 'Gemini TTS uses use_live_api=1 (Live API)');
+    t.ok(result.filePath.includes(`model_name=${geminiModel}`), 'Gemini TTS streaming path contains model_name');
+    t.ok(result.filePath.includes('prompt=Speak naturally.'), 'Gemini TTS streaming path contains prompt');
+
+    // Test 4: Gemini TTS with SSML stripping in streaming mode
+    result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'Leda',
+      model: geminiModel,
+      text: '<speak>This SSML should be stripped.</speak>',
+      instructions: 'Speak naturally.',
+      disableTtsCache: true
+    });
+    t.ok(result.filePath.startsWith('say:'), 'Gemini TTS with SSML returns streaming say: path');
+    t.ok(!result.filePath.includes('<speak>'), 'SSML tags are stripped from streaming path');
+    t.ok(result.filePath.includes('This SSML should be stripped.'), 'Text content is preserved after SSML stripping');
+
+    // Test 5: Gemini TTS with prompt containing special characters
+    result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'Kore',
+      model: geminiModel,
+      text: 'Testing special characters in prompt.',
+      options: { prompt: 'Speak in a warm, friendly tone' },
+      disableTtsCache: true
+    });
+    t.ok(result.filePath.startsWith('say:'), 'Gemini TTS with special chars returns streaming say: path');
+    // Commas in prompt should be replaced with semicolons
+    t.ok(result.filePath.includes('prompt=Speak in a warm; friendly tone'), 'Commas in prompt are escaped to semicolons');
+
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+test('Google TTS non-streaming tests (JAMBONES_DISABLE_TTS_STREAMING=true)', async(t) => {
+  // Enable streaming disable flag
+  process.env.JAMBONES_DISABLE_TTS_STREAMING = 'true';
+
+  // Clear require cache to reload config with new env var
+  delete require.cache[require.resolve('../lib/config')];
+  delete require.cache[require.resolve('../lib/synth-audio')];
+  delete require.cache[require.resolve('..')];
+
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.GCP_FILE && !process.env.GCP_JSON_KEY) {
+    t.pass('skipping Google TTS non-streaming tests since neither GCP_FILE nor GCP_JSON_KEY provided');
+    delete process.env.JAMBONES_DISABLE_TTS_STREAMING;
+    return t.end();
+  }
+
+  try {
+    const str = process.env.GCP_JSON_KEY || fs.readFileSync(process.env.GCP_FILE);
+    const creds = JSON.parse(str);
+    const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts';
+
+    // Test 1: Standard voice falls back to non-streaming API
+    let result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'en-US-Wavenet-D',
+      gender: 'MALE',
+      text: 'This is a test with streaming disabled.',
+      disableTtsCache: true
+    });
+    t.ok(!result.filePath.startsWith('say:'), 'Standard voice does NOT return streaming say: path when disabled');
+    t.ok(result.filePath.endsWith('.mp3'), 'Standard voice returns mp3 file path');
+
+    // Test 2: HD voice falls back to non-streaming API
+    result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'en-US-Chirp3-HD-Charon',
+      text: 'This is a test of HD voice with streaming disabled.',
+      disableTtsCache: true
+    });
+    t.ok(!result.filePath.startsWith('say:'), 'HD voice does NOT return streaming say: path when disabled');
+    t.ok(result.filePath.endsWith('.mp3'), 'HD voice returns mp3 file path');
+
+    // Test 3: Gemini TTS falls back to non-streaming API
+    result = await synthAudio(stats, {
+      vendor: 'google',
+      credentials: {
+        credentials: {
+          client_email: creds.client_email,
+          private_key: creds.private_key,
+        },
+      },
+      language: 'en-US',
+      voice: 'Kore',
+      model: geminiModel,
+      text: 'This is a test of Gemini TTS with streaming disabled.',
+      instructions: 'Speak naturally.',
+      disableTtsCache: true
+    });
+    t.ok(!result.filePath.startsWith('say:'), 'Gemini TTS does NOT return streaming say: path when disabled');
+    t.ok(result.filePath.endsWith('.mp3'), 'Gemini TTS returns mp3 file path');
+
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  } finally {
+    // Clean up: restore default behavior
+    delete process.env.JAMBONES_DISABLE_TTS_STREAMING;
+    delete require.cache[require.resolve('../lib/config')];
+    delete require.cache[require.resolve('../lib/synth-audio')];
+    delete require.cache[require.resolve('..')];
+  }
+  client.quit();
+});
+
 test('AWS speech synth tests', async(t) => {
  const fn = require('..');
  const {synthAudio, client} = fn(opts, logger);