diff --git a/lib/synth-audio.js b/lib/synth-audio.js index 60aae1a..32b3d31 100644 --- a/lib/synth-audio.js +++ b/lib/synth-audio.js @@ -53,6 +53,8 @@ const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS; const OpenAI = require('openai'); const getAwsAuthToken = require('./get-aws-sts-token'); +/* counter for playback id */ +let playbackIdCounter = 0; const trimTrailingSilence = (buffer) => { assert.ok(buffer instanceof Buffer, 'trimTrailingSilence - argument is not a Buffer'); @@ -589,8 +591,8 @@ const synthMicrosoft = async(logger, { } if (!JAMBONES_DISABLE_TTS_STREAMING && !JAMBONES_DISABLE_AZURE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${apiKey}`; + let params = '{'; + params += `api_key=${apiKey}`; params += `,language=${language}`; params += ',vendor=microsoft'; params += `,voice=${voice}`; @@ -853,8 +855,9 @@ const synthElevenlabs = async(logger, { /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */ if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += ',vendor=elevenlabs'; params += `,voice=${voice}`; params += `,model_id=${model_id}`; @@ -943,8 +946,9 @@ const synthPlayHT = async(client, logger, { /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */ if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += `,user_id=${user_id}`; params += ',vendor=playht'; params += `,voice=${voice}`; @@ -1010,8 +1014,9 @@ const synthInworld = async(logger, { /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */ if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += `,model_id=${model_id}`; params += ',vendor=inworld'; params += `,voice=${voice}`; @@ -1074,8 +1079,9 @@ const synthRimelabs = async(logger, { /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */ if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += `,model_id=${model_id}`; params += ',vendor=rimelabs'; params += `,language=${language}`; @@ -1130,8 +1136,9 @@ const synthVerbio = async(client, logger, {credentials, stats, voice, text, rend } const token = await getVerbioAccessToken(client, logger, credentials); if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{access_token=${token.access_token}`; + let params = '{'; + params += `access_token=${token.access_token}`; + params += `,playback_id=${playbackIdCounter++}`; params += ',vendor=verbio'; params += `,voice=${voice}`; params += ',write_cache_file=1'; @@ -1173,8 +1180,9 @@ const synthWhisper = async(logger, {credentials, stats, voice, text, instruction const {api_key, model_id, baseURL, timeout, speed} = credentials; /* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */ if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += `,model_id=${model_id}`; params += ',vendor=whisper'; params += `,voice=${voice}`; @@ -1219,8 +1227,9 @@ const synthWhisper = async(logger, {credentials, stats, voice, text, instruction const synthDeepgram = async(logger, {credentials, stats, model, text, renderForCaching, disableTtsStreaming}) => { const {api_key, deepgram_tts_uri} = credentials; if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += ',vendor=deepgram'; params += `,voice=${model}`; params += ',write_cache_file=1'; @@ -1262,8 +1271,9 @@ const synthCartesia = async(logger, { const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}'); if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += `,model_id=${model_id}`; params += ',vendor=cartesia'; params += `,voice=${voice}`; @@ -1333,8 +1343,9 @@ const synthResemble = async(logger, { /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */ if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { - let params = ''; - params += `{api_key=${api_key}`; + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${playbackIdCounter++}`; params += ',vendor=resemble'; params += `,voice=${voice}`; params += ',write_cache_file=1';