Merge pull request #51 from jambonz/feat/deepgram

support deepgram
2026-07-04 19:31:49 +00:00 · 2023-12-25 22:10:50 -05:00
parent 4ef8538bcd 4cfc730d92
commit 08aae32975
2 changed files with 52 additions and 3 deletions
@@ -84,7 +84,7 @@ async function synthAudio(client, logger, stats, { account_sid,
  logger = logger || noopLogger;

  assert.ok(['google', 'aws', 'polly', 'microsoft',
-    'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper'].includes(vendor) ||
+    'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs', 'whisper', 'deepgram'].includes(vendor) ||
  vendor.startsWith('custom'),
  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
  if ('google' === vendor) {
@@ -199,6 +199,9 @@ async function synthAudio(client, logger, stats, { account_sid,
      case 'whisper':
        audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text});
        break;
+      case 'deepgram':
+        audioBuffer = await synthDeepgram(logger, {credentials, stats, model, text});
+        break;
      case vendor.startsWith('custom') ? vendor : 'cant_match_value':
        ({ audioBuffer, filePath } = await synthCustomVendor(logger,
          {credentials, stats, language, voice, text, filePath}));
@@ -640,8 +643,27 @@ const synthWhisper = async(logger, {credentials, stats, voice, text}) => {
    stats.increment('tts.count', ['vendor:openai', 'accepted:no']);
    throw err;
  }
-}
-;
+};
+
+const synthDeepgram = async(logger, {credentials, stats, model, text}) => {
+  const {api_key} = credentials;
+  try {
+    const post = bent('https://api.beta.deepgram.com', 'POST', 'buffer', {
+      'Authorization': `Token ${api_key}`,
+      'Accept': 'audio/mpeg',
+      'Content-Type': 'application/json'
+    });
+    const mp3 = await post(`/v1/speak?model=${model}`, {
+      text
+    });
+    return mp3;
+  } catch (err) {
+    logger.info({err}, 'synth Deepgram returned error');
+    stats.increment('tts.count', ['vendor:deepgram', 'accepted:no']);
+    throw err;
+  }
+};
+
 const getFileExtFromMime = (mime) => {
  switch (mime) {
    case 'audio/wav':
@@ -514,6 +514,33 @@ test('whisper speech synth tests', async(t) => {
  client.quit();
 })

+test('Deepgram speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.DEEPGRAM_API_KEY) {
+    t.pass('skipping Deepgram speech synth tests since DEEPGRAM_API_KEY');
+    return t.end();
+  }
+  const text = 'Hi there and welcome to jambones!';
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'deepgram',
+      credentials: {
+        api_key: process.env.DEEPGRAM_API_KEY
+      },
+      model: 'alpha-asteria-en-v2',
+      text,
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized deepgram audio to ${opts.filePath}`);
+
+  } catch (err) {
+    console.error(JSON.stringify(err));
+    t.end(err);
+  }
+  client.quit();
+})
+
 test('TTS Cache tests', async(t) => {
  const fn = require('..');
  const {purgeTtsCache, getTtsSize, client} = fn(opts, logger);