Merge branch 'main' into feat/azure_tts

2026-01-25 02:08:26 +00:00 · 2024-03-30 17:04:01 +07:00
parent f06f96a6f0 8f3e930004
commit 16dd7a2805
4 changed files with 56 additions and 3 deletions
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -635,6 +635,8 @@ const synthElevenlabs = async(logger, {
  if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
    let params = '';
    params += `{api_key=${api_key}`;
+    params += ',vendor=elevenlabs';
+    params += `,voice=${voice}`;
    params += `,model_id=${model_id}`;
    params += `,optimize_streaming_latency=${opts.optimize_streaming_latency || 2}`;
    params += ',write_cache_file=1';
@@ -683,6 +685,7 @@ const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCa
    let params = '';
    params += `{api_key=${api_key}`;
    params += `,model_id=${model_id}`;
+    params += ',vendor=whisper';
    params += `,voice=${voice}`;
    params += ',write_cache_file=1';
    if (speed) params += `,speed=${speed}`;
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@jambonz/speech-utils",
-  "version": "0.0.42",
+  "version": "0.0.44",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "@jambonz/speech-utils",
-      "version": "0.0.42",
+      "version": "0.0.44",
      "license": "MIT",
      "dependencies": {
        "@aws-sdk/client-polly": "^3.496.0",
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@jambonz/speech-utils",
-  "version": "0.0.42",
+  "version": "0.0.44",
  "description": "TTS-related speech utilities for jambonz",
  "main": "index.js",
  "author": "Dave Horton",
--- a/test/synth.js
+++ b/test/synth.js
@@ -214,6 +214,56 @@ test('Azure speech synth tests', async(t) => {
  client.quit();
 });

+test('Azure SSML tests', async(t) => {
+  const fn = require('..');
+  const {synthAudio, client} = fn(opts, logger);
+
+  if (!process.env.MICROSOFT_API_KEY || !process.env.MICROSOFT_REGION) {
+    t.pass('skipping Microsoft speech synth tests since MICROSOFT_API_KEY or MICROSOFT_REGION not provided');
+    return t.end();
+  }
+  try {
+    const text = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">
+    <voice name="en-US-JennyMultilingualNeural">
+    <mstts:express-as style="cheerful" styledegree="2">That'd be just amazing!
+    </mstts:express-as>
+    </voice>
+    </speak>`;
+
+    let opts = await synthAudio(stats, {
+      vendor: 'microsoft',
+      credentials: {
+        api_key: process.env.MICROSOFT_API_KEY,
+        region: process.env.MICROSOFT_REGION,
+      },
+      language: 'en-US',
+      voice: 'en-US-ChristopherNeural',
+      text,
+    });
+    t.ok(!opts.servedFromCache, `successfully synthesized microsoft audio to ${opts.filePath}`);
+    if (process.env.JAMBONES_HTTP_PROXY_IP && process.env.JAMBONES_HTTP_PROXY_PORT) {
+      t.pass('successfully used proxy to reach microsoft tts service');
+    }
+
+    opts = await synthAudio(stats, {
+      vendor: 'microsoft',
+      credentials: {
+        api_key: process.env.MICROSOFT_API_KEY,
+        region: process.env.MICROSOFT_REGION,
+      },
+      language: 'en-US',
+      voice: 'en-US-ChristopherNeural',
+      text,
+    });
+    t.ok(opts.servedFromCache, `successfully retrieved microsoft audio from cache ${opts.filePath}`);
+  } catch (err) {
+    console.error(err);
+    t.end(err);
+  }
+  client.quit();
+});
+
+
 test('Azure custom voice speech synth tests', async(t) => {
  const fn = require('..');
  const {synthAudio, client} = fn(opts, logger);