mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
support mod_rimelabs_tts
This commit is contained in:
@@ -84,10 +84,10 @@ async function synthAudio(client, logger, stats, { account_sid,
|
|||||||
let rtt;
|
let rtt;
|
||||||
logger = logger || noopLogger;
|
logger = logger || noopLogger;
|
||||||
|
|
||||||
assert.ok(['google', 'aws', 'polly', 'microsoft',
|
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs',
|
||||||
'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper', 'deepgram', 'playht'].includes(vendor) ||
|
'whisper', 'deepgram', 'playht', 'rimelabs'].includes(vendor) ||
|
||||||
vendor.startsWith('custom'),
|
vendor.startsWith('custom'),
|
||||||
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
|
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
|
||||||
if ('google' === vendor) {
|
if ('google' === vendor) {
|
||||||
assert.ok(language, 'synthAudio requires language when google is used');
|
assert.ok(language, 'synthAudio requires language when google is used');
|
||||||
}
|
}
|
||||||
@@ -123,11 +123,15 @@ async function synthAudio(client, logger, stats, { account_sid,
|
|||||||
assert.ok(voice, 'synthAudio requires voice when elevenlabs is used');
|
assert.ok(voice, 'synthAudio requires voice when elevenlabs is used');
|
||||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when elevenlabs is used');
|
assert.ok(credentials.api_key, 'synthAudio requires api_key when elevenlabs is used');
|
||||||
assert.ok(credentials.model_id, 'synthAudio requires model_id when elevenlabs is used');
|
assert.ok(credentials.model_id, 'synthAudio requires model_id when elevenlabs is used');
|
||||||
} else if ('playht' === vendor) {
|
} else if ('playht' === vendor) {
|
||||||
assert.ok(voice, 'synthAudio requires voice when playht is used');
|
assert.ok(voice, 'synthAudio requires voice when playht is used');
|
||||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when playht is used');
|
assert.ok(credentials.api_key, 'synthAudio requires api_key when playht is used');
|
||||||
assert.ok(credentials.user_id, 'synthAudio requires user_id when playht is used');
|
assert.ok(credentials.user_id, 'synthAudio requires user_id when playht is used');
|
||||||
assert.ok(credentials.voice_engine, 'synthAudio requires voice_engine when playht is used');
|
assert.ok(credentials.voice_engine, 'synthAudio requires voice_engine when playht is used');
|
||||||
|
} else if ('rimelabs' === vendor) {
|
||||||
|
assert.ok(voice, 'synthAudio requires voice when rimelabs is used');
|
||||||
|
assert.ok(credentials.api_key, 'synthAudio requires api_key when rimelabs is used');
|
||||||
|
assert.ok(credentials.model_id, 'synthAudio requires model_id when rimelabs is used');
|
||||||
} else if ('whisper' === vendor) {
|
} else if ('whisper' === vendor) {
|
||||||
assert.ok(voice, 'synthAudio requires voice when whisper is used');
|
assert.ok(voice, 'synthAudio requires voice when whisper is used');
|
||||||
assert.ok(credentials.model_id, 'synthAudio requires model when whisper is used');
|
assert.ok(credentials.model_id, 'synthAudio requires model when whisper is used');
|
||||||
@@ -217,6 +221,12 @@ async function synthAudio(client, logger, stats, { account_sid,
|
|||||||
});
|
});
|
||||||
if (audioBuffer?.filePath) return audioBuffer;
|
if (audioBuffer?.filePath) return audioBuffer;
|
||||||
break;
|
break;
|
||||||
|
case 'rimelabs':
|
||||||
|
audioBuffer = await synthRimelabs(logger, {
|
||||||
|
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
|
||||||
|
});
|
||||||
|
if (audioBuffer?.filePath) return audioBuffer;
|
||||||
|
break;
|
||||||
case 'whisper':
|
case 'whisper':
|
||||||
audioBuffer = await synthWhisper(logger, {
|
audioBuffer = await synthWhisper(logger, {
|
||||||
credentials, stats, voice, text, renderForCaching, disableTtsStreaming});
|
credentials, stats, voice, text, renderForCaching, disableTtsStreaming});
|
||||||
@@ -748,6 +758,52 @@ const synthPlayHT = async(logger, {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const synthRimelabs = async(logger, {
|
||||||
|
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
|
||||||
|
}) => {
|
||||||
|
const {api_key, model_id, options: credOpts} = credentials;
|
||||||
|
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||||
|
|
||||||
|
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||||
|
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
|
let params = '';
|
||||||
|
params += `{api_key=${api_key}`;
|
||||||
|
params += `,model_id=${model_id}`;
|
||||||
|
params += ',vendor=rimelabs';
|
||||||
|
params += `,voice=${voice}`;
|
||||||
|
params += ',write_cache_file=1';
|
||||||
|
if (opts.speedAlpha) params += `,speed_alpha=${opts.speedAlpha}`;
|
||||||
|
if (opts.reduceLatency) params += `,reduce_latency=${opts.reduceLatency}`;
|
||||||
|
params += '}';
|
||||||
|
|
||||||
|
return {
|
||||||
|
filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`,
|
||||||
|
servedFromCache: false,
|
||||||
|
rtt: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const post = bent('https://users.rime.ai', 'POST', 'buffer', {
|
||||||
|
'Authorization': `Bearer ${api_key}`,
|
||||||
|
'Accept': 'audio/mp3',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
});
|
||||||
|
const mp3 = await post('/v1/rime-tts', {
|
||||||
|
speaker: voice,
|
||||||
|
text,
|
||||||
|
modelId: model_id,
|
||||||
|
samplingRate: 8000,
|
||||||
|
...opts
|
||||||
|
});
|
||||||
|
return mp3;
|
||||||
|
} catch (err) {
|
||||||
|
logger.info({err}, 'synth rimelabs returned error');
|
||||||
|
stats.increment('tts.count', ['vendor:rimelabs', 'accepted:no']);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
||||||
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
||||||
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
|
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
|
||||||
|
|||||||
@@ -581,14 +581,48 @@ test('PlayHT speech synth tests', async(t) => {
|
|||||||
text,
|
text,
|
||||||
renderForCaching: true
|
renderForCaching: true
|
||||||
});
|
});
|
||||||
t.ok(!opts.servedFromCache, `successfully synthesized eleven audio to ${opts.filePath}`);
|
t.ok(!opts.servedFromCache, `successfully playht eleven audio to ${opts.filePath}`);
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(JSON.stringify(err));
|
console.error(JSON.stringify(err));
|
||||||
t.end(err);
|
t.end(err);
|
||||||
}
|
}
|
||||||
client.quit();
|
client.quit();
|
||||||
})
|
});
|
||||||
|
|
||||||
|
test('rimelabs speech synth tests', async(t) => {
|
||||||
|
const fn = require('..');
|
||||||
|
const {synthAudio, client} = fn(opts, logger);
|
||||||
|
|
||||||
|
if (!process.env.RIMELABS_API_KEY) {
|
||||||
|
t.pass('skipping rimelabs speech synth tests since RIMELABS_API_KEY is not provided');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
const text = 'Hi there and welcome to jambones!';
|
||||||
|
try {
|
||||||
|
let opts = await synthAudio(stats, {
|
||||||
|
vendor: 'rimelabs',
|
||||||
|
credentials: {
|
||||||
|
api_key: process.env.RIMELABS_API_KEY,
|
||||||
|
model_id: 'mist',
|
||||||
|
options: JSON.stringify({
|
||||||
|
speedAlpha: 1.0,
|
||||||
|
reduceLatency: false
|
||||||
|
})
|
||||||
|
},
|
||||||
|
language: 'en-US',
|
||||||
|
voice: 'amber',
|
||||||
|
text,
|
||||||
|
renderForCaching: true
|
||||||
|
});
|
||||||
|
t.ok(!opts.servedFromCache, `successfully synthesized rimelabs audio to ${opts.filePath}`);
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
console.error(JSON.stringify(err));
|
||||||
|
t.end(err);
|
||||||
|
}
|
||||||
|
client.quit();
|
||||||
|
});
|
||||||
|
|
||||||
test('whisper speech synth tests', async(t) => {
|
test('whisper speech synth tests', async(t) => {
|
||||||
const fn = require('..');
|
const fn = require('..');
|
||||||
|
|||||||
Reference in New Issue
Block a user