diff --git a/lib/synth-audio.js b/lib/synth-audio.js index f114c87..f07e14b 100644 --- a/lib/synth-audio.js +++ b/lib/synth-audio.js @@ -76,7 +76,7 @@ const trimTrailingSilence = (buffer) => { * the synthesized audio, and a variable indicating whether it was served from cache */ async function synthAudio(client, logger, stats, { account_sid, - vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId, disableTtsCache + vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId, disableTtsCache, options }) { let audioBuffer; let servedFromCache = false; @@ -194,7 +194,7 @@ async function synthAudio(client, logger, stats, { account_sid, audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath}); break; case 'elevenlabs': - audioBuffer = await synthElevenlabs(logger, {credentials, stats, language, voice, text, filePath}); + audioBuffer = await synthElevenlabs(logger, {credentials, options, stats, language, voice, text, filePath}); break; case 'whisper': audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text}); @@ -585,21 +585,25 @@ const synthCustomVendor = async(logger, {credentials, stats, language, voice, te } }; -const synthElevenlabs = async(logger, {credentials, stats, language, voice, text}) => { - const {api_key, model_id} = credentials; +const synthElevenlabs = async(logger, {credentials, options, stats, language, voice, text}) => { + const {api_key, model_id, options: credOpts} = credentials; + const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}'); + const optimize_streaming_latency = opts.optimize_streaming_latency ? + `?optimize_streaming_latency=${opts.optimize_streaming_latency}` : ''; try { const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', { 'xi-api-key': api_key, 'Accept': 'audio/mpeg', 'Content-Type': 'application/json' }); - const mp3 = await post(`/v1/text-to-speech/${voice}`, { + const mp3 = await post(`/v1/text-to-speech/${voice}${optimize_streaming_latency}`, { text, model_id, voice_settings: { stability: 0.5, similarity_boost: 0.5 - } + }, + ...opts }); return mp3; } catch (err) { diff --git a/test/synth.js b/test/synth.js index 112c168..7032572 100644 --- a/test/synth.js +++ b/test/synth.js @@ -459,7 +459,16 @@ test('Elevenlabs speech synth tests', async(t) => { vendor: 'elevenlabs', credentials: { api_key: process.env.ELEVENLABS_API_KEY, - model_id: process.env.ELEVENLABS_MODEL_ID + model_id: process.env.ELEVENLABS_MODEL_ID, + options: JSON.stringify({ + optimize_streaming_latency: 1, + voice_settings: { + similarity_boost: 1, + stability: 0.8, + style: 1, + use_speaker_boost: true + } + }) }, language: 'en-US', voice: process.env.ELEVENLABS_VOICE_ID,