mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
add synthesize verbio
This commit is contained in:
@@ -22,6 +22,7 @@ const {
|
||||
noopLogger
|
||||
} = require('./utils');
|
||||
const getNuanceAccessToken = require('./get-nuance-access-token');
|
||||
const getVerbioAccessToken = require('./get-verbio-token');
|
||||
const {
|
||||
SynthesisRequest,
|
||||
Voice,
|
||||
@@ -86,7 +87,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
logger = logger || noopLogger;
|
||||
|
||||
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs',
|
||||
'whisper', 'deepgram', 'playht', 'rimelabs'].includes(vendor) ||
|
||||
'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio'].includes(vendor) ||
|
||||
vendor.startsWith('custom'),
|
||||
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
|
||||
if ('google' === vendor) {
|
||||
@@ -139,6 +140,10 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when whisper is used');
|
||||
} else if (vendor.startsWith('custom')) {
|
||||
assert.ok(credentials.custom_tts_url, `synthAudio requires custom_tts_url in credentials when ${vendor} is used`);
|
||||
} else if ('verbio' === vendor) {
|
||||
assert.ok(voice, 'synthAudio requires voice when verbio is used');
|
||||
assert.ok(credentials.client_id, 'synthAudio requires client_id when verbio is used');
|
||||
assert.ok(credentials.client_secret, 'synthAudio requires client_secret when verbio is used');
|
||||
}
|
||||
const key = makeSynthKey({
|
||||
account_sid,
|
||||
@@ -149,7 +154,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
text
|
||||
});
|
||||
let filePath;
|
||||
if (['nuance', 'nvidia'].includes(vendor) ||
|
||||
if (['nuance', 'nvidia', 'verbio'].includes(vendor) ||
|
||||
(
|
||||
(process.env.JAMBONES_TTS_TRIM_SILENCE || !process.env.JAMBONES_DISABLE_TTS_STREAMING) &&
|
||||
['microsoft', 'azure'].includes(vendor)
|
||||
@@ -234,6 +239,11 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
credentials, stats, voice, text, renderForCaching, disableTtsStreaming});
|
||||
if (audioBuffer?.filePath) return audioBuffer;
|
||||
break;
|
||||
case 'verbio':
|
||||
audioBuffer = await synthVerbio(client, logger, {
|
||||
credentials, stats, voice, text, renderForCaching, disableTtsStreaming});
|
||||
if (audioBuffer?.filePath) return audioBuffer;
|
||||
break;
|
||||
case 'deepgram':
|
||||
audioBuffer = await synthDeepgram(logger, {credentials, stats, model, text,
|
||||
renderForCaching, disableTtsStreaming});
|
||||
@@ -819,6 +829,46 @@ const synthRimelabs = async(logger, {
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
const synthVerbio = async(client, logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
||||
//https://doc.speechcenter.verbio.com/#tag/Text-To-Speech-REST-API
|
||||
if (text.length > 2000) {
|
||||
throw new Error('Verbio cannot synthesize for the text length larger than 2000 characters');
|
||||
}
|
||||
const token = await getVerbioAccessToken(client, logger, credentials);
|
||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||
let params = '';
|
||||
params += `{access_token=${token.access_token}`;
|
||||
params += ',vendor=verbio';
|
||||
params += `,voice=${voice}`;
|
||||
params += ',write_cache_file=1';
|
||||
params += '}';
|
||||
|
||||
return {
|
||||
filePath: `say:${params}${text.replace(/\n/g, ' ')}`,
|
||||
servedFromCache: false,
|
||||
rtt: 0
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const post = bent('https://us.rest.speechcenter.verbio.com', 'POST', 'buffer', {
|
||||
'Authorization': `Bearer ${token.access_token}`,
|
||||
'User-Agent': 'jambonz',
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
const r8 = await post('/api/v1/synthesize', {
|
||||
voice_id: voice,
|
||||
output_sample_rate: '8k',
|
||||
output_encoding: 'pcm16',
|
||||
text
|
||||
});
|
||||
return r8;
|
||||
} catch (err) {
|
||||
logger.info({err}, 'synth Verbio returned error');
|
||||
stats.increment('tts.count', ['vendor:verbio', 'accepted:no']);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
||||
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
||||
|
||||
@@ -28,9 +28,7 @@ test('Verbio - get Access key and voices', async(t) => {
|
||||
client_secret: process.env.VERBIO_CLIENT_SECRET
|
||||
};
|
||||
let obj = await getVerbioAccessToken(credentials);
|
||||
t.ok(obj.access_token && !obj.servedFromCache, 'successfully received access token not from cache');
|
||||
obj = await getVerbioAccessToken(credentials);
|
||||
t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
|
||||
t.ok(obj.access_token , 'successfully received access token not from cache');
|
||||
const voices = await getTtsVoices({vendor: 'verbio', credentials});
|
||||
t.ok(voices && voices.length != 0, 'successfully received verbio voices');
|
||||
} catch (err) {
|
||||
|
||||
@@ -670,6 +670,40 @@ test('whisper speech synth tests', async(t) => {
|
||||
language: 'en-US',
|
||||
voice: 'alloy',
|
||||
text,
|
||||
renderForCaching: true
|
||||
});
|
||||
t.ok(!opts.servedFromCache, `successfully synthesized whisper audio to ${opts.filePath}`);
|
||||
|
||||
} catch (err) {
|
||||
console.error(JSON.stringify(err));
|
||||
t.end(err);
|
||||
}
|
||||
client.quit();
|
||||
});
|
||||
|
||||
test('Verbio speech synth tests', async(t) => {
|
||||
const fn = require('..');
|
||||
const {synthAudio, client} = fn(opts, logger);
|
||||
|
||||
if (!process.env.VERBIO_CLIENT_ID || !process.env.VERBIO_CLIENT_SECRET) {
|
||||
t.pass('skipping Verbio Synthesize test since no Verbio Keys provided');
|
||||
t.end();
|
||||
client.quit();
|
||||
return;
|
||||
}
|
||||
|
||||
const text = 'Hi there and welcome to jambones!';
|
||||
try {
|
||||
let opts = await synthAudio(stats, {
|
||||
vendor: 'verbio',
|
||||
credentials: {
|
||||
client_id: process.env.VERBIO_CLIENT_ID,
|
||||
client_secret: process.env.VERBIO_CLIENT_SECRET
|
||||
},
|
||||
language: 'en-US',
|
||||
voice: 'tommy_en-us',
|
||||
text,
|
||||
renderForCaching: true
|
||||
});
|
||||
t.ok(!opts.servedFromCache, `successfully synthesized whisper audio to ${opts.filePath}`);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user