diff --git a/lib/synth-audio.js b/lib/synth-audio.js index e97ce7c..7b4dd39 100644 --- a/lib/synth-audio.js +++ b/lib/synth-audio.js @@ -80,7 +80,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc logger = logger || noopLogger; assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nvidia', 'elevenlabs', - 'whisper', 'deepgram', 'rimelabs', 'cartesia', 'inworld', 'resemble'].includes(vendor) || + 'whisper', 'deepgram', 'rimelabs', 'cartesia', 'inworld', 'resemble', 'murf'].includes(vendor) || vendor.startsWith('custom'), `synthAudio supported vendors are google, aws, microsoft, nvidia and wellsaid ..etc, not ${vendor}`); if ('google' === vendor) { @@ -127,6 +127,9 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc } else if ('cartesia' === vendor) { assert.ok(credentials.api_key, 'synthAudio requires api_key when cartesia is used'); assert.ok(credentials.model_id, 'synthAudio requires model_id when cartesia is used'); + } else if ('murf' === vendor) { + assert.ok(voice, 'synthAudio requires voice when murf is used'); + assert.ok(credentials.api_key, 'synthAudio requires api_key when murf is used'); } else if (vendor === 'resemble') { assert.ok(voice, 'synthAudio requires voice when resemble is used'); assert.ok(credentials.api_key, 'synthAudio requires api_key when resemble is used'); @@ -211,6 +214,11 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache}); break; + case 'murf': + audioData = await synthMurf(logger, { + credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming, + disableTtsCache}); + break; case 'whisper': audioData = await synthWhisper(logger, { credentials, stats, voice, key, text, instructions, renderForCaching, disableTtsStreaming, @@ -969,6 +977,72 @@ const synthRimelabs = async(logger, { throw err; } }; + +const synthMurf = async(logger, { + credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache +}) => { + const {api_key, model_id, api_uri, options: credOpts} = credentials; + const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}'); + + /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */ + if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) { + /* param keys here must match mod_murf_tts's text_param handler */ + let params = '{'; + params += `api_key=${api_key}`; + params += `,playback_id=${key}`; + params += ',vendor=murf'; + params += `,voice=${voice}`; + if (model_id) params += `,model_id=${model_id}`; + if (language) params += `,language=${language}`; + if (api_uri) params += `,api_uri=${api_uri}`; + if (opts.style) params += `,style=${opts.style}`; + if (opts.rate !== undefined && opts.rate !== null) params += `,rate=${opts.rate}`; + if (opts.pitch !== undefined && opts.pitch !== null) params += `,pitch=${opts.pitch}`; + if (opts.variation !== undefined && opts.variation !== null) params += `,variation=${opts.variation}`; + params += `,write_cache_file=${disableTtsCache ? 0 : 1}`; + params += '}'; + + return { + filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`, + servedFromCache: false, + rtt: 0 + }; + } + + try { + const sampleRate = 8000; + /* no Accept header: murf returns 406 if it doesn't match; the response + container is selected by the `format` field in the body instead */ + const post = bent(api_uri || 'https://global.api.murf.ai', 'POST', 'buffer', { + 'api-key': api_key, + 'Content-Type': 'application/json' + }); + /* murf REST schema is documented loosely; field names follow the SDK params + (voice_id/model/format/sample_rate) plus the websocket voice fields. */ + const audioContent = await post('/v1/speech/stream', { + text, + voice_id: voice, + ...(model_id && {model: model_id}), + ...(language && {locale: language}), + ...(opts.style && {style: opts.style}), + ...(opts.rate !== undefined && opts.rate !== null && {rate: opts.rate}), + ...(opts.pitch !== undefined && opts.pitch !== null && {pitch: opts.pitch}), + ...(opts.variation !== undefined && opts.variation !== null && {variation: opts.variation}), + format: 'WAV', + sample_rate: sampleRate, + channel_type: 'MONO' + }); + return { + audioContent, + extension: 'wav', + sampleRate + }; + } catch (err) { + logger.info({err}, 'synth murf returned error'); + stats.increment('tts.count', ['vendor:murf', 'accepted:no']); + throw err; + } +}; const synthWhisper = async(logger, {credentials, stats, voice, key, text, instructions, renderForCaching, disableTtsStreaming, disableTtsCache}) => { const {api_key, model_id, baseURL, timeout, speed} = credentials;