custom tts vendor

2025-12-19 03:37:49 +00:00 · 2023-03-05 21:01:45 +07:00
parent 3e503996e6
commit d423cbc19a
2 changed files with 76 additions and 2 deletions
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -60,8 +60,9 @@ async function synthAudio(client, logger, stats, {
  let rtt;
  logger = logger || noopLogger;

-  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor),
-    `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
+  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor) ||
+  vendor.startsWith('custom'),
+  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
  if ('google' === vendor) {
    assert.ok(language, 'synthAudio requires language when google is used');
  }
@@ -91,6 +92,8 @@ async function synthAudio(client, logger, stats, {
    language = 'en-US'; // WellSaid only supports English atm
    assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
    assert.ok(!text.startsWith('<speak'), 'wellsaid does not support SSML tags');
+  } else  if (vendor.startsWith('custom')) {
+    assert.ok(credentials.custom_tts_url, `synthAudio requires custom_tts_url in credentials when ${vendor} is used`);
  }

  const key = makeSynthKey({
@@ -151,6 +154,9 @@ async function synthAudio(client, logger, stats, {
      case 'wellsaid':
        audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
        break;
+      case vendor.startsWith('custom') ? vendor : 'cant_match_value':
+        audioBuffer = await synthCustomVendor(logger, {credentials, stats, language, voice, text});
+        break;
      default:
        assert(`synthAudio: unsupported speech vendor ${vendor}`);
    }
@@ -433,4 +439,31 @@ const synthNvidia = async(client, logger, {credentials, stats, language,  voice,
  });
 };

+
+// CustomVendor accept only mp3
+const synthCustomVendor = async(logger, {credentials, stats, language, voice, text}) => {
+  const {vendor, auth_token, custom_tts_url} = credentials;
+
+  try {
+    const post = bent('POST', 'buffer', {
+      'Authorization': `Bearer ${auth_token}`,
+      'Accept': 'audio/mpeg',
+      'Content-Type': 'application/json'
+    });
+
+    const mp3 = await post(custom_tts_url, {
+      language,
+      format: 'audio/mpeg',
+      voice,
+      type: text.startsWith('<speak>') ? 'ssml' : 'text',
+      text
+    });
+
+    return mp3;
+  } catch (err) {
+    logger.info({err}, `Vendor ${vendor} returned error`);
+    throw err;
+  }
+};
+
 module.exports = synthAudio;
--- a/test/synth.js
+++ b/test/synth.js
@@ -326,6 +326,47 @@ test('IBM watson speech synth tests', async(t) => {
  client.quit();
 });

+test('Custom Vendor speech synth tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  // if (!process.env.CUSTOM_VENDOR_TTS_URL) {
+  //   t.pass('skipping Custom Vendor speech synth tests since CUSTOM_VENDOR_TTS_URL not provided');
+  //   return t.end();
+  // }
+  try {
+    let opts = await synthAudio(stats, {
+      vendor: 'custom:somethingnew',
+      credentials: {
+        use_for_tts: 1,
+        custom_tts_url: process.env.CUSTOM_VENDOR_TTS_URL,
+        auth_token: 'some_jwt_token'
+      },
+      language: 'en-US',
+      voice: 'English-US.Female-1',
+      text: 'This is a test.  This is only a test',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
+
+    opts = await synthAudio(stats, {
+      vendor: 'custom:somethingnew',
+      credentials: {
+        use_for_tts: 1,
+        custom_tts_url: process.env.CUSTOM_VENDOR_TTS_URL,
+        auth_token: 'some_jwt_token'
+      },
+      language: 'en-US',
+      voice: 'English-US.Female-1',
+      text: '<speak>This is a test.  This is only a test</speak>',
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
 test('TTS Cache tests', async(t) => {
  const fn = require('..');
  const {purgeTtsCache, client} = fn(opts, logger);