Merge pull request #135 from jambonz/feat/gemini_tts

support gemini tts
This commit is contained in:
Dave Horton
2026-01-17 21:39:28 -05:00
committed by GitHub
4 changed files with 1559 additions and 367 deletions

View File

@@ -204,7 +204,10 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
const startAt = process.hrtime(); const startAt = process.hrtime();
switch (vendor) { switch (vendor) {
case 'google': case 'google':
audioData = await synthGoogle(logger, {credentials, stats, language, voice, gender, key, text}); audioData = await synthGoogle(logger, {
credentials, stats, language, voice, gender, key, text, model, options, instructions,
renderForCaching, disableTtsStreaming, disableTtsCache
});
break; break;
case 'aws': case 'aws':
case 'polly': case 'polly':
@@ -409,72 +412,124 @@ const synthPolly = async(createHash, retrieveHash, logger,
} }
}; };
const synthGoogle = async(logger, {credentials, stats, language, voice, gender, text}) => {
const client = new ttsGoogle.TextToSpeechClient(credentials);
// If google custom voice cloning is used.
// At this time 31 Oct 2024, google node sdk has not support voice cloning yet.
if (typeof voice === 'object' && voice.voice_cloning_key) {
try {
const accessToken = await client.auth.getAccessToken();
const projectId = await client.getProjectId();
const post = bent('https://texttospeech.googleapis.com', 'POST', 'json', { const synthGoogle = async(logger, {
'Authorization': `Bearer ${accessToken}`, credentials, stats, language, voice, gender, key, text, model, options, instructions,
'x-goog-user-project': projectId, renderForCaching, disableTtsStreaming, disableTtsCache
'Content-Type': 'application/json; charset=utf-8' }) => {
}); const isGemini = !!model;
const isVoiceCloning = typeof voice === 'object' && voice.voice_cloning_key;
// HD voices have pattern like en-US-Chirp3-HD-Charon
const isHDVoice = typeof voice === 'string' && voice.includes('-HD-');
const payload = { // Streaming support for Google TTS (Gemini, HD voices, and standard voices)
input: { // Voice cloning does not support streaming
text if (!isVoiceCloning && !JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
}, // Strip SSML tags for Gemini TTS (it doesn't support SSML)
voice: { let inputText = text;
language_code: language, if (isGemini && text.startsWith('<speak>')) {
voice_clone: { inputText = text.replace(/<[^>]*>/g, '').trim();
voice_cloning_key: voice.voice_cloning_key logger.info('synthGoogle: Gemini TTS does not support SSML, stripped tags from input');
}
},
audioConfig: {
// Cloning voice at this time still in v1 beta version, and it support LINEAR16 in Wav format, 24.000Hz
audioEncoding: 'LINEAR16',
sample_rate_hertz: 24000
}
};
const wav = await post('/v1beta1/text:synthesize', payload);
return {
audioContent: Buffer.from(wav.audioContent, 'base64'),
extension: 'wav',
sampleRate: 24000
};
} catch (err) {
logger.info({err: await err.text()}, 'synthGoogle returned error');
throw err;
} }
let params = '{';
params += `credentials=${Buffer.from(JSON.stringify(credentials.credentials)).toString('base64')}`;
params += `,playback_id=${key}`;
params += ',vendor=google';
params += `,voice=${voice}`;
params += `,language_code=${language || 'en-US'}`;
params += `,write_cache_file=${disableTtsCache ? 0 : 1}`;
const useLiveApi = options?.useLiveApi ?? isHDVoice;
const useGeminiTts = options?.useGeminiTts ?? isGemini;
params += `,use_live_api=${useLiveApi ? 1 : 0}`;
params += `,use_gemini_tts=${useGeminiTts ? 1 : 0}`;
if (model) params += `,model_name=${model}`;
if (gender) params += `,gender=${gender}`;
// comma is used to separate parameters in freeswitch tts module
const prompt = options?.prompt || instructions;
if (prompt) params += `,prompt=${prompt.replace(/\n/g, ' ').replace(/,/g, ';')}`;
params += '}';
return {
filePath: `say:${params}${(isGemini ? inputText : text).replace(/\n/g, ' ')}`,
servedFromCache: false,
rtt: 0
};
} }
const opts = { const client = new ttsGoogle.TextToSpeechClient(credentials);
voice: {
...(typeof voice === 'string' && {name: voice}), // Build input based on voice type
...(typeof voice === 'object' && {customVoice: voice}), let input;
if (isGemini) {
// Gemini TTS does not support SSML - strip tags if present
let inputText = text;
if (text.startsWith('<speak>')) {
inputText = text.replace(/<[^>]*>/g, '').trim();
logger.info('synthGoogle: Gemini TTS does not support SSML, stripped tags from input');
}
// Use instructions as prompt for Gemini TTS style control, options.prompt can override
const prompt = options?.prompt || instructions;
input = {
text: inputText,
...(prompt && { prompt })
};
} else {
input = text.startsWith('<speak>') ? { ssml: text } : { text };
}
// Build voice selection params based on voice type
let voiceParams;
if (isGemini) {
voiceParams = {
languageCode: language || 'en-US',
name: voice,
modelName: model
};
} else if (isVoiceCloning) {
voiceParams = {
languageCode: language,
voiceClone: {
voiceCloningKey: voice.voice_cloning_key
}
};
} else {
voiceParams = {
...(typeof voice === 'string' && { name: voice }),
...(typeof voice === 'object' && { customVoice: voice }),
languageCode: language, languageCode: language,
ssmlGender: gender || 'SSML_VOICE_GENDER_UNSPECIFIED' ssmlGender: gender || 'SSML_VOICE_GENDER_UNSPECIFIED'
}, };
audioConfig: {audioEncoding: 'MP3'} }
};
Object.assign(opts, {input: text.startsWith('<speak>') ? {ssml: text} : {text}}); // Build audio config based on voice type
let audioConfig;
let extension;
let sampleRate;
if (isVoiceCloning) {
audioConfig = { audioEncoding: 'LINEAR16', sampleRateHertz: 24000 };
extension = 'wav';
sampleRate = 24000;
} else {
audioConfig = { audioEncoding: 'MP3' };
extension = 'mp3';
sampleRate = 8000;
}
const opts = { input, voice: voiceParams, audioConfig };
try { try {
const responses = await client.synthesizeSpeech(opts); logger.debug({ opts }, 'synthGoogle: request');
const [response] = await client.synthesizeSpeech(opts);
stats.increment('tts.count', ['vendor:google', 'accepted:yes']); stats.increment('tts.count', ['vendor:google', 'accepted:yes']);
client.close(); client.close();
return { return {
audioContent: responses[0].audioContent, audioContent: response.audioContent,
extension: 'mp3', extension,
sampleRate: 8000 sampleRate
}; };
} catch (err) { } catch (err) {
console.error(err); logger.info({ err, opts }, 'synthAudio: Error synthesizing speech using google');
logger.info({err, opts}, 'synthAudio: Error synthesizing speech using google');
stats.increment('tts.count', ['vendor:google', 'accepted:no']); stats.increment('tts.count', ['vendor:google', 'accepted:no']);
client && client.close(); client && client.close();
throw err; throw err;

1376
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -30,7 +30,7 @@
"@aws-sdk/client-polly": "^3.496.0", "@aws-sdk/client-polly": "^3.496.0",
"@aws-sdk/client-sts": "^3.496.0", "@aws-sdk/client-sts": "^3.496.0",
"@cartesia/cartesia-js": "^2.2.7", "@cartesia/cartesia-js": "^2.2.7",
"@google-cloud/text-to-speech": "^5.5.0", "@google-cloud/text-to-speech": "^6.4.0",
"@grpc/grpc-js": "^1.9.14", "@grpc/grpc-js": "^1.9.14",
"@jambonz/realtimedb-helpers": "^0.8.7", "@jambonz/realtimedb-helpers": "^0.8.7",
"bent": "^7.3.12", "bent": "^7.3.12",

View File

@@ -41,6 +41,7 @@ test('Google speech synth tests', async(t) => {
gender: 'FEMALE', gender: 'FEMALE',
text: 'This is a test. This is only a test', text: 'This is a test. This is only a test',
salt: 'foo.bar', salt: 'foo.bar',
renderForCaching: true,
}); });
t.ok(!opts.servedFromCache, `successfully synthesized google audio to ${opts.filePath}`); t.ok(!opts.servedFromCache, `successfully synthesized google audio to ${opts.filePath}`);
@@ -55,6 +56,7 @@ test('Google speech synth tests', async(t) => {
language: 'en-GB', language: 'en-GB',
gender: 'FEMALE', gender: 'FEMALE',
text: 'This is a test. This is only a test', text: 'This is a test. This is only a test',
renderForCaching: true,
}); });
t.ok(opts.servedFromCache, `successfully retrieved cached google audio from ${opts.filePath}`); t.ok(opts.servedFromCache, `successfully retrieved cached google audio from ${opts.filePath}`);
@@ -78,6 +80,7 @@ test('Google speech synth tests', async(t) => {
language: 'en-GB', language: 'en-GB',
gender: 'FEMALE', gender: 'FEMALE',
text: 'This is a test. This is only a test', text: 'This is a test. This is only a test',
renderForCaching: true,
}); });
t.ok(!opts.servedFromCache, `successfully synthesized google audio regardless of current cache to ${opts.filePath}`); t.ok(!opts.servedFromCache, `successfully synthesized google audio regardless of current cache to ${opts.filePath}`);
} catch (err) { } catch (err) {
@@ -114,7 +117,8 @@ GCP_CUSTOM_VOICE_FILE nor GCP_CUSTOM_VOICE_JSON_KEY provided, GCP_CUSTOM_VOICE_M
voice: { voice: {
reportedUsage: 'REALTIME', reportedUsage: 'REALTIME',
model: process.env.GCP_CUSTOM_VOICE_MODEL model: process.env.GCP_CUSTOM_VOICE_MODEL
} },
renderForCaching: true,
}); });
t.ok(!opts.servedFromCache, `successfully synthesized google custom voice audio to ${opts.filePath}`); t.ok(!opts.servedFromCache, `successfully synthesized google custom voice audio to ${opts.filePath}`);
} catch (err) { } catch (err) {
@@ -132,7 +136,7 @@ test('Google speech voice cloning synth tests', async(t) => {
!process.env.GCP_CUSTOM_VOICE_JSON_KEY || !process.env.GCP_CUSTOM_VOICE_JSON_KEY ||
!process.env.GCP_VOICE_CLONING_FILE && !process.env.GCP_VOICE_CLONING_FILE &&
!process.env.GCP_VOICE_CLONING_JSON_KEY) { !process.env.GCP_VOICE_CLONING_JSON_KEY) {
t.pass(`skipping google speech synth tests since neither t.pass(`skipping google speech synth tests since neither
GCP_CUSTOM_VOICE_FILE nor GCP_CUSTOM_VOICE_JSON_KEY provided, GCP_CUSTOM_VOICE_FILE nor GCP_CUSTOM_VOICE_JSON_KEY provided,
GCP_VOICE_CLONING_FILE nor GCP_VOICE_CLONING_JSON_KEY is not provided`); GCP_VOICE_CLONING_FILE nor GCP_VOICE_CLONING_JSON_KEY is not provided`);
return t.end(); return t.end();
@@ -156,7 +160,8 @@ GCP_VOICE_CLONING_FILE nor GCP_VOICE_CLONING_JSON_KEY is not provided`);
text: 'This is a test. This is only a test. This is a test. This is only a test. This is a test. This is only a test', text: 'This is a test. This is only a test. This is a test. This is only a test. This is a test. This is only a test',
voice: { voice: {
voice_cloning_key voice_cloning_key
} },
renderForCaching: true,
}); });
t.ok(!opts.servedFromCache, `successfully synthesized google voice cloning audio to ${opts.filePath}`); t.ok(!opts.servedFromCache, `successfully synthesized google voice cloning audio to ${opts.filePath}`);
} catch (err) { } catch (err) {
@@ -166,6 +171,380 @@ GCP_VOICE_CLONING_FILE nor GCP_VOICE_CLONING_JSON_KEY is not provided`);
client.quit(); client.quit();
}); });
test('Google Gemini TTS synth tests', async(t) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
if (!process.env.GCP_FILE && !process.env.GCP_JSON_KEY) {
t.pass('skipping Google Gemini TTS synth tests since neither GCP_FILE nor GCP_JSON_KEY provided');
return t.end();
}
try {
const str = process.env.GCP_JSON_KEY || fs.readFileSync(process.env.GCP_FILE);
const creds = JSON.parse(str);
const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts';
// Test Gemini TTS with model and instructions (both required for Gemini)
let result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Kore',
model: geminiModel,
text: 'Hello, this is a test of Google Gemini text to speech.',
instructions: 'Speak clearly and naturally.',
renderForCaching: true,
});
t.ok(!result.servedFromCache, `successfully synthesized Google Gemini TTS audio to ${result.filePath}`);
t.ok(result.filePath.endsWith('.mp3'), 'Gemini TTS audio file has correct extension');
// Test Gemini TTS with different voice and instructions
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Charon',
model: geminiModel,
text: 'Welcome to our service. How can I help you today?',
instructions: 'Speak in a warm, friendly and professional tone.',
renderForCaching: true,
});
t.ok(!result.servedFromCache, `successfully synthesized Gemini TTS with instructions to ${result.filePath}`);
// Test cache retrieval
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Kore',
model: geminiModel,
text: 'Hello, this is a test of Google Gemini text to speech.',
instructions: 'Speak clearly and naturally.',
renderForCaching: true,
});
t.ok(result.servedFromCache, `successfully retrieved Gemini TTS audio from cache ${result.filePath}`);
// Test SSML stripping (Gemini doesn't support SSML)
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Leda',
model: geminiModel,
text: '<speak>This SSML should be stripped for Gemini TTS.</speak>',
instructions: 'Speak naturally.',
disableTtsCache: true,
renderForCaching: true,
});
t.ok(!result.servedFromCache, `successfully synthesized Gemini TTS with SSML stripped to ${result.filePath}`);
} catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
test('Google TTS streaming tests (!JAMBONES_DISABLE_TTS_STREAMING)', async(t) => {
// Ensure streaming is enabled (default behavior)
delete process.env.JAMBONES_DISABLE_TTS_STREAMING;
// Clear require cache to reload config with new env var
delete require.cache[require.resolve('../lib/config')];
delete require.cache[require.resolve('../lib/synth-audio')];
delete require.cache[require.resolve('..')];
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
if (!process.env.GCP_FILE && !process.env.GCP_JSON_KEY) {
t.pass('skipping Google TTS streaming tests since neither GCP_FILE nor GCP_JSON_KEY provided');
return t.end();
}
try {
const str = process.env.GCP_JSON_KEY || fs.readFileSync(process.env.GCP_FILE);
const creds = JSON.parse(str);
const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts';
// Test 1: Standard voice streaming (use_live_api=0)
let result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'en-US-Wavenet-D',
gender: 'MALE',
text: 'This is a test of standard voice streaming.',
disableTtsCache: true
});
t.ok(result.filePath.startsWith('say:'), 'Standard voice returns streaming say: path');
t.ok(result.filePath.includes('vendor=google'), 'Standard voice streaming path contains vendor=google');
t.ok(result.filePath.includes('use_live_api=0'), 'Standard voice uses use_live_api=0');
t.ok(result.filePath.includes('use_gemini_tts=0'), 'Standard voice uses use_gemini_tts=0');
t.ok(result.filePath.includes('voice=en-US-Wavenet-D'), 'Standard voice streaming path contains voice');
// Verify credentials are base64 encoded (no raw JSON braces that would break FreeSWitch parsing)
t.ok(result.filePath.includes('credentials='), 'Standard voice streaming path contains credentials');
t.ok(!result.filePath.includes('credentials={'), 'Credentials are not raw JSON (base64 encoded)');
// Test 2: HD voice streaming (use_live_api=1)
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'en-US-Chirp3-HD-Charon',
text: 'This is a test of HD voice streaming.',
disableTtsCache: true
});
t.ok(result.filePath.startsWith('say:'), 'HD voice returns streaming say: path');
t.ok(result.filePath.includes('vendor=google'), 'HD voice streaming path contains vendor=google');
t.ok(result.filePath.includes('use_live_api=1'), 'HD voice uses use_live_api=1 (Live API)');
t.ok(result.filePath.includes('use_gemini_tts=0'), 'HD voice uses use_gemini_tts=0');
t.ok(result.filePath.includes('voice=en-US-Chirp3-HD-Charon'), 'HD voice streaming path contains voice');
// Test 3: Gemini TTS streaming (use_live_api=1)
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Kore',
model: geminiModel,
text: 'This is a test of Gemini TTS streaming.',
instructions: 'Speak naturally.',
disableTtsCache: true
});
t.ok(result.filePath.startsWith('say:'), 'Gemini TTS returns streaming say: path');
t.ok(result.filePath.includes('vendor=google'), 'Gemini TTS streaming path contains vendor=google');
t.ok(result.filePath.includes('use_live_api=0'), 'Gemini TTS uses use_live_api=0');
t.ok(result.filePath.includes('use_gemini_tts=1'), 'Gemini TTS uses use_gemini_tts=1');
t.ok(result.filePath.includes(`model_name=${geminiModel}`), 'Gemini TTS streaming path contains model_name');
t.ok(result.filePath.includes('prompt=Speak naturally.'), 'Gemini TTS streaming path contains prompt');
// Test 4: Gemini TTS with SSML stripping in streaming mode
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Leda',
model: geminiModel,
text: '<speak>This SSML should be stripped.</speak>',
instructions: 'Speak naturally.',
disableTtsCache: true
});
t.ok(result.filePath.startsWith('say:'), 'Gemini TTS with SSML returns streaming say: path');
t.ok(!result.filePath.includes('<speak>'), 'SSML tags are stripped from streaming path');
t.ok(result.filePath.includes('This SSML should be stripped.'), 'Text content is preserved after SSML stripping');
// Test 5: Gemini TTS with prompt containing special characters
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Kore',
model: geminiModel,
text: 'Testing special characters in prompt.',
options: { prompt: 'Speak in a warm, friendly tone' },
disableTtsCache: true
});
t.ok(result.filePath.startsWith('say:'), 'Gemini TTS with special chars returns streaming say: path');
// Commas in prompt should be replaced with semicolons
t.ok(result.filePath.includes('prompt=Speak in a warm; friendly tone'), 'Commas in prompt are escaped to semicolons');
// Test 6: options.useLiveApi override (force live api on standard voice)
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'en-US-Wavenet-D',
text: 'Testing useLiveApi option override.',
options: { useLiveApi: true },
disableTtsCache: true
});
t.ok(result.filePath.includes('use_live_api=1'), 'options.useLiveApi=true overrides default for standard voice');
t.ok(result.filePath.includes('use_gemini_tts=0'), 'use_gemini_tts remains 0 for standard voice');
// Test 7: options.useGeminiTts override (force gemini tts without model)
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Kore',
text: 'Testing useGeminiTts option override.',
options: { useGeminiTts: true },
disableTtsCache: true
});
t.ok(result.filePath.includes('use_gemini_tts=1'), 'options.useGeminiTts=true overrides default');
t.ok(result.filePath.includes('use_live_api=0'), 'use_live_api remains 0 without HD voice');
// Test 8: Both options override together
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'en-US-Wavenet-D',
text: 'Testing both options override.',
options: { useLiveApi: true, useGeminiTts: true },
disableTtsCache: true
});
t.ok(result.filePath.includes('use_live_api=1'), 'options.useLiveApi=true works with useGeminiTts');
t.ok(result.filePath.includes('use_gemini_tts=1'), 'options.useGeminiTts=true works with useLiveApi');
} catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
test('Google TTS non-streaming tests (JAMBONES_DISABLE_TTS_STREAMING=true)', async(t) => {
// Enable streaming disable flag
process.env.JAMBONES_DISABLE_TTS_STREAMING = 'true';
// Clear require cache to reload config with new env var
delete require.cache[require.resolve('../lib/config')];
delete require.cache[require.resolve('../lib/synth-audio')];
delete require.cache[require.resolve('..')];
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
if (!process.env.GCP_FILE && !process.env.GCP_JSON_KEY) {
t.pass('skipping Google TTS non-streaming tests since neither GCP_FILE nor GCP_JSON_KEY provided');
delete process.env.JAMBONES_DISABLE_TTS_STREAMING;
return t.end();
}
try {
const str = process.env.GCP_JSON_KEY || fs.readFileSync(process.env.GCP_FILE);
const creds = JSON.parse(str);
const geminiModel = process.env.GCP_GEMINI_TTS_MODEL || 'gemini-2.5-flash-tts';
// Test 1: Standard voice falls back to non-streaming API
let result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'en-US-Wavenet-D',
gender: 'MALE',
text: 'This is a test with streaming disabled.',
disableTtsCache: true
});
t.ok(!result.filePath.startsWith('say:'), 'Standard voice does NOT return streaming say: path when disabled');
t.ok(result.filePath.endsWith('.mp3'), 'Standard voice returns mp3 file path');
// Test 2: HD voice falls back to non-streaming API
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'en-US-Chirp3-HD-Charon',
text: 'This is a test of HD voice with streaming disabled.',
disableTtsCache: true
});
t.ok(!result.filePath.startsWith('say:'), 'HD voice does NOT return streaming say: path when disabled');
t.ok(result.filePath.endsWith('.mp3'), 'HD voice returns mp3 file path');
// Test 3: Gemini TTS falls back to non-streaming API
result = await synthAudio(stats, {
vendor: 'google',
credentials: {
credentials: {
client_email: creds.client_email,
private_key: creds.private_key,
},
},
language: 'en-US',
voice: 'Kore',
model: geminiModel,
text: 'This is a test of Gemini TTS with streaming disabled.',
instructions: 'Speak naturally.',
disableTtsCache: true
});
t.ok(!result.filePath.startsWith('say:'), 'Gemini TTS does NOT return streaming say: path when disabled');
t.ok(result.filePath.endsWith('.mp3'), 'Gemini TTS returns mp3 file path');
} catch (err) {
console.error(err);
t.end(err);
} finally {
// Clean up: restore default behavior
delete process.env.JAMBONES_DISABLE_TTS_STREAMING;
delete require.cache[require.resolve('../lib/config')];
delete require.cache[require.resolve('../lib/synth-audio')];
delete require.cache[require.resolve('..')];
}
client.quit();
});
test('AWS speech synth tests', async(t) => { test('AWS speech synth tests', async(t) => {
const fn = require('..'); const fn = require('..');
const {synthAudio, client} = fn(opts, logger); const {synthAudio, client} = fn(opts, logger);