Merge pull request #51 from jambonz/feat/deepgram

support deepgram
This commit is contained in:
Dave Horton
2023-12-25 22:10:50 -05:00
committed by GitHub
2 changed files with 52 additions and 3 deletions

View File

@@ -84,7 +84,7 @@ async function synthAudio(client, logger, stats, { account_sid,
logger = logger || noopLogger;
assert.ok(['google', 'aws', 'polly', 'microsoft',
'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper'].includes(vendor) ||
'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper', 'deepgram'].includes(vendor) ||
vendor.startsWith('custom'),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
if ('google' === vendor) {
@@ -199,6 +199,9 @@ async function synthAudio(client, logger, stats, { account_sid,
case 'whisper':
audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text});
break;
case 'deepgram':
audioBuffer = await synthDeepgram(logger, {credentials, stats, model, text});
break;
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
({ audioBuffer, filePath } = await synthCustomVendor(logger,
{credentials, stats, language, voice, text, filePath}));
@@ -640,8 +643,27 @@ const synthWhisper = async(logger, {credentials, stats, voice, text}) => {
stats.increment('tts.count', ['vendor:openai', 'accepted:no']);
throw err;
}
}
;
};
const synthDeepgram = async(logger, {credentials, stats, model, text}) => {
const {api_key} = credentials;
try {
const post = bent('https://api.beta.deepgram.com', 'POST', 'buffer', {
'Authorization': `Token ${api_key}`,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post(`/v1/speak?model=${model}`, {
text
});
return mp3;
} catch (err) {
logger.info({err}, 'synth Deepgram returned error');
stats.increment('tts.count', ['vendor:deepgram', 'accepted:no']);
throw err;
}
};
const getFileExtFromMime = (mime) => {
switch (mime) {
case 'audio/wav':

View File

@@ -514,6 +514,33 @@ test('whisper speech synth tests', async(t) => {
client.quit();
})
test('Deepgram speech synth tests', async(t) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
if (!process.env.DEEPGRAM_API_KEY) {
t.pass('skipping Deepgram speech synth tests since DEEPGRAM_API_KEY');
return t.end();
}
const text = 'Hi there and welcome to jambones!';
try {
let opts = await synthAudio(stats, {
vendor: 'deepgram',
credentials: {
api_key: process.env.DEEPGRAM_API_KEY
},
model: 'alpha-asteria-en-v2',
text,
});
t.ok(!opts.servedFromCache, `successfully synthesized deepgram audio to ${opts.filePath}`);
} catch (err) {
console.error(JSON.stringify(err));
t.end(err);
}
client.quit();
})
test('TTS Cache tests', async(t) => {
const fn = require('..');
const {purgeTtsCache, getTtsSize, client} = fn(opts, logger);