wip

2026-01-25 02:08:26 +00:00 · 2023-08-25 13:49:04 +07:00
parent 40f51e7509
commit b1049aad7f
1 changed files with 59 additions and 1 deletions
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -303,7 +303,65 @@ const synthIbm = async(logger, {credentials, stats, voice, text}) => {
  }
 };

-const synthMicrosoft = async(logger, {
+async function synthMicrosoft(logger, {
+  credentials,
+  stats,
+  language,
+  voice,
+  text,
+  filePath
+}) {
+  const synthFunc = process.env.ONPREM_AZURE_SYNTHESIZE_BY_HTTP ?
+  _synthMicrosoftByHttp : _synthMicrosoftBySdk;
+  return await synthFunc(logger, {
+    credentials,
+    stats,
+    language,
+    voice,
+    text,
+    filePath
+  });
+}
+
+async function _synthMicrosoftByHttp (logger, {
+  credentials,
+  stats,
+  language,
+  voice,
+  text,
+  filePath
+}) {
+  const {api_key: apiKey, region, use_custom_tts, custom_tts_endpoint, custom_tts_endpoint_url} = credentials;
+  if (use_custom_tts && !content.startsWith('<speak')) {
+    /**
+     * Note: it seems that to use custom voice ssml is required with the voice attribute
+     * Otherwise sending plain text we get "Voice does not match"
+     */
+    content = `<speak>${text}</speak>`;
+  }
+
+  if (content.startsWith('<speak>')) {
+    /* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
+    const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
+    // eslint-disable-next-line max-len
+    content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
+    logger.info({content}, 'synthMicrosoft');
+  }
+
+  try {
+    const post = bent('POST', 'buffer', {
+      'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
+      'Content-Type': 'application/ssml+xml'
+    });
+    const mp3 = await post(custom_tts_endpoint_url, content);
+    return mp3;
+  } catch (err) {
+    logger.info({err}, '_synthMicrosoftByHttp returned error');
+    throw err;
+  }
+}
+
+const _synthMicrosoftBySdk = async(logger, {
  credentials,
  stats,
  language,