mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-07-04 19:31:49 +00:00
feat(murf): add Murf.ai TTS support to synthAudio (#146)
* feat(murf): add Murf.ai TTS support to synthAudio
Add synthMurf() following the rimelabs/cartesia pattern:
- streaming path returns a say:{vendor=murf,...} filePath consumed by the
FreeSWITCH mod_murf_tts module
- non-streaming path calls POST /v1/speech/stream (api-key header) and returns
WAV audio for cache rendering
Register murf in the supported-vendor assert list and the synth switch.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
* fix(murf): drop Accept: audio/basic header (caused 406 Not Acceptable)
Murf's /v1/speech/stream rejects an unmatched Accept header with 406; the
response container is chosen by the `format` body field instead. Verified a
WAV request now returns 200 (valid RIFF/WAVE).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+75
-1
@@ -80,7 +80,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
|||||||
logger = logger || noopLogger;
|
logger = logger || noopLogger;
|
||||||
|
|
||||||
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nvidia', 'elevenlabs',
|
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nvidia', 'elevenlabs',
|
||||||
'whisper', 'deepgram', 'rimelabs', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
|
'whisper', 'deepgram', 'rimelabs', 'cartesia', 'inworld', 'resemble', 'murf'].includes(vendor) ||
|
||||||
vendor.startsWith('custom'),
|
vendor.startsWith('custom'),
|
||||||
`synthAudio supported vendors are google, aws, microsoft, nvidia and wellsaid ..etc, not ${vendor}`);
|
`synthAudio supported vendors are google, aws, microsoft, nvidia and wellsaid ..etc, not ${vendor}`);
|
||||||
if ('google' === vendor) {
|
if ('google' === vendor) {
|
||||||
@@ -127,6 +127,9 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
|||||||
} else if ('cartesia' === vendor) {
|
} else if ('cartesia' === vendor) {
|
||||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when cartesia is used');
|
assert.ok(credentials.api_key, 'synthAudio requires api_key when cartesia is used');
|
||||||
assert.ok(credentials.model_id, 'synthAudio requires model_id when cartesia is used');
|
assert.ok(credentials.model_id, 'synthAudio requires model_id when cartesia is used');
|
||||||
|
} else if ('murf' === vendor) {
|
||||||
|
assert.ok(voice, 'synthAudio requires voice when murf is used');
|
||||||
|
assert.ok(credentials.api_key, 'synthAudio requires api_key when murf is used');
|
||||||
} else if (vendor === 'resemble') {
|
} else if (vendor === 'resemble') {
|
||||||
assert.ok(voice, 'synthAudio requires voice when resemble is used');
|
assert.ok(voice, 'synthAudio requires voice when resemble is used');
|
||||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when resemble is used');
|
assert.ok(credentials.api_key, 'synthAudio requires api_key when resemble is used');
|
||||||
@@ -211,6 +214,11 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
|||||||
credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
|
credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
|
||||||
disableTtsCache});
|
disableTtsCache});
|
||||||
break;
|
break;
|
||||||
|
case 'murf':
|
||||||
|
audioData = await synthMurf(logger, {
|
||||||
|
credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
|
||||||
|
disableTtsCache});
|
||||||
|
break;
|
||||||
case 'whisper':
|
case 'whisper':
|
||||||
audioData = await synthWhisper(logger, {
|
audioData = await synthWhisper(logger, {
|
||||||
credentials, stats, voice, key, text, instructions, renderForCaching, disableTtsStreaming,
|
credentials, stats, voice, key, text, instructions, renderForCaching, disableTtsStreaming,
|
||||||
@@ -969,6 +977,72 @@ const synthRimelabs = async(logger, {
|
|||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const synthMurf = async(logger, {
|
||||||
|
credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
|
||||||
|
}) => {
|
||||||
|
const {api_key, model_id, api_uri, options: credOpts} = credentials;
|
||||||
|
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||||
|
|
||||||
|
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||||
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
|
/* param keys here must match mod_murf_tts's text_param handler */
|
||||||
|
let params = '{';
|
||||||
|
params += `api_key=${api_key}`;
|
||||||
|
params += `,playback_id=${key}`;
|
||||||
|
params += ',vendor=murf';
|
||||||
|
params += `,voice=${voice}`;
|
||||||
|
if (model_id) params += `,model_id=${model_id}`;
|
||||||
|
if (language) params += `,language=${language}`;
|
||||||
|
if (api_uri) params += `,api_uri=${api_uri}`;
|
||||||
|
if (opts.style) params += `,style=${opts.style}`;
|
||||||
|
if (opts.rate !== undefined && opts.rate !== null) params += `,rate=${opts.rate}`;
|
||||||
|
if (opts.pitch !== undefined && opts.pitch !== null) params += `,pitch=${opts.pitch}`;
|
||||||
|
if (opts.variation !== undefined && opts.variation !== null) params += `,variation=${opts.variation}`;
|
||||||
|
params += `,write_cache_file=${disableTtsCache ? 0 : 1}`;
|
||||||
|
params += '}';
|
||||||
|
|
||||||
|
return {
|
||||||
|
filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`,
|
||||||
|
servedFromCache: false,
|
||||||
|
rtt: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const sampleRate = 8000;
|
||||||
|
/* no Accept header: murf returns 406 if it doesn't match; the response
|
||||||
|
container is selected by the `format` field in the body instead */
|
||||||
|
const post = bent(api_uri || 'https://global.api.murf.ai', 'POST', 'buffer', {
|
||||||
|
'api-key': api_key,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
});
|
||||||
|
/* murf REST schema is documented loosely; field names follow the SDK params
|
||||||
|
(voice_id/model/format/sample_rate) plus the websocket voice fields. */
|
||||||
|
const audioContent = await post('/v1/speech/stream', {
|
||||||
|
text,
|
||||||
|
voice_id: voice,
|
||||||
|
...(model_id && {model: model_id}),
|
||||||
|
...(language && {locale: language}),
|
||||||
|
...(opts.style && {style: opts.style}),
|
||||||
|
...(opts.rate !== undefined && opts.rate !== null && {rate: opts.rate}),
|
||||||
|
...(opts.pitch !== undefined && opts.pitch !== null && {pitch: opts.pitch}),
|
||||||
|
...(opts.variation !== undefined && opts.variation !== null && {variation: opts.variation}),
|
||||||
|
format: 'WAV',
|
||||||
|
sample_rate: sampleRate,
|
||||||
|
channel_type: 'MONO'
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
audioContent,
|
||||||
|
extension: 'wav',
|
||||||
|
sampleRate
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
logger.info({err}, 'synth murf returned error');
|
||||||
|
stats.increment('tts.count', ['vendor:murf', 'accepted:no']);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
};
|
||||||
const synthWhisper = async(logger, {credentials, stats, voice, key, text, instructions,
|
const synthWhisper = async(logger, {credentials, stats, voice, key, text, instructions,
|
||||||
renderForCaching, disableTtsStreaming, disableTtsCache}) => {
|
renderForCaching, disableTtsStreaming, disableTtsCache}) => {
|
||||||
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
||||||
|
|||||||
Reference in New Issue
Block a user