fix: split ssml to correct chunks (#225)

* fix: split ssml to correct chunks * fix: split ssml to correct chunks * fixed: eslint * fixed: eslint * fixed: add comment to testcase * fixed: review comments * fixed: review comments * fixed: review comments * fixed: review comments * fixed: review comments Co-authored-by: Quan HL <quanluuhoang8@gmail.com>
2025-12-19 04:17:44 +00:00 · 2023-01-24 21:48:31 +07:00
parent 8c0044a378
commit 088316d266
6 changed files with 78 additions and 87 deletions
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -1,96 +1,26 @@
 const Task = require('./task');
 const {TaskName, TaskPreconditions} = require('../utils/constants');
 const pollySSMLSplit = require('polly-ssml-split');
 const breakLengthyTextIfNeeded = (logger,  text) => {
  const chunkSize = 1000;
  if (text.length <= chunkSize) return [text];
  const result = [];
  const isSSML = text.startsWith('<speak>');
-  let startPos = 0;
+  if (text.length <= chunkSize || !isSSML) return [text];
-  let charPos = isSSML ? 7 : 0;  // skip <speak>
+  const options = {
-  let tag;
+    // MIN length
-  //logger.debug({isSSML}, `breakLengthyTextIfNeeded: handling text of length ${text.length}`);
+    softLimit: 100,
-  while (startPos + charPos < text.length) {
+    // MAX length, exclude 15 characters <speak></speak>
-    if (isSSML && !tag && text[startPos + charPos] === '<') {
+    hardLimit: chunkSize - 15,
-      const tagStartPos = ++charPos;
+    // Set of extra split characters (Optional property)
-      while (startPos + charPos < text.length) {
+    extraSplitChars: ',;!?',
-        if (text[startPos + charPos] === '>') {
+  };
-          if (text[startPos + charPos - 1] === '\\') tag = null;
+  pollySSMLSplit.configure(options);
-          else if (!tag) tag = text.substring(startPos + tagStartPos, startPos + charPos - 1);
+  try {
-          break;
+    return pollySSMLSplit.split(text);
-        }
+  } catch (err) {
-        if (!tag) {
+    logger.info({err}, 'Error spliting SSML long text');
-          const c = text[startPos + charPos];
+    return [text];
          if (c === ' ') {
            tag = text.substring(startPos + tagStartPos, startPos + charPos);
            //logger.debug(`breakLengthyTextIfNeeded: enter tag ${tag} (space)`);
            break;
          }
        }
        charPos++;
      }
      if (tag) {
        //search for end of tag
        //logger.debug(`breakLengthyTextIfNeeded: searching forward for </${tag}>`);
        const e1 = text.indexOf(`</${tag}>`, startPos + charPos);
        const e2 = text.indexOf('/>', startPos + charPos);
        const tagEndPos = e1 === -1 ? e2 : e2 === -1 ? e1 : Math.min(e1, e2);
        if (tagEndPos === -1) {
          //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} not found, exiting`);
        } else {
          //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} found at ${tagEndPos}`);
          charPos = tagEndPos + 1;
        }
        tag = null;
      }
      continue;
    }
    if (charPos < chunkSize) {
      charPos++;
      continue;
    }
    // start looking for a good break point
    let chunkIt = false;
    const a = text[startPos + charPos];
    const b = text[startPos + charPos + 1];
    if (/[\.!\?]/.test(a) && /\s/.test(b)) {
      //logger.debug('breakLengthyTextIfNeeded: breaking at sentence end');
      chunkIt = true;
    }
    if (chunkIt) {
      charPos++;
      const chunk = text.substr(startPos, charPos);
      if (isSSML) {
        result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}</speak>`);
      }
      else result.push(chunk);
      charPos = 0;
      startPos += chunk.length;
      //logger.debug({chunk: result[result.length - 1]},
      //  `breakLengthyTextIfNeeded: chunked; new starting pos ${startPos}`);
    }
    else charPos++;
  }
  // final chunk
  if (startPos < text.length) {
    const chunk = text.substr(startPos);
    if (isSSML) {
      result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}`);
    }
    else result.push(chunk);
    //logger.debug({chunk: result[result.length - 1]},
    //  `breakLengthyTextIfNeeded: final chunk; starting pos ${startPos} length ${chunk.length}`);
  }
  return result;
 };
 class TaskSay extends Task {
--- a/package-lock.json
+++ b/package-lock.json
@@ -34,6 +34,7 @@
        "moment": "^2.29.4",
        "parse-url": "^8.1.0",
        "pino": "^6.14.0",
        "polly-ssml-split": "^0.1.0",
        "sdp-transform": "^2.14.1",
        "short-uuid": "^4.2.0",
        "to-snake-case": "^1.0.0",
@@ -5956,6 +5957,19 @@
        "node": ">=8"
      }
    },
    "node_modules/polly-ssml-split": {
      "version": "0.1.0",
      "resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz",
      "integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==",
      "dependencies": {
        "polly-text-split": "^0.1.4"
      }
    },
    "node_modules/polly-text-split": {
      "version": "0.1.4",
      "resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz",
      "integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw=="
    },
    "node_modules/prelude-ls": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
@@ -12345,6 +12359,19 @@
        "find-up": "^4.0.0"
      }
    },
    "polly-ssml-split": {
      "version": "0.1.0",
      "resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz",
      "integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==",
      "requires": {
        "polly-text-split": "^0.1.4"
      }
    },
    "polly-text-split": {
      "version": "0.1.4",
      "resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz",
      "integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw=="
    },
    "prelude-ls": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
--- a/package.json
+++ b/package.json
@@ -56,7 +56,8 @@
    "uuid-random": "^1.3.2",
    "verify-aws-sns-signature": "^0.1.0",
    "ws": "^8.9.0",
-    "xml2js": "^0.4.23"
+    "xml2js": "^0.4.23",
    "polly-ssml-split": "^0.1.0"
  },
  "devDependencies": {
    "clear-module": "^4.1.2",
--- a/test/data/bad/bad-say-ssml.json
+++ b/test/data/bad/bad-say-ssml.json
@@ -0,0 +1,9 @@
 {
  "say": {
    "text": "<speak>I already told you <emphasis level=\"strong\">I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you <break time=\"3s\"/> I really like that person!</emphasis> this is another long text.</speak>",
    "synthesizer": {
      "vendor": "google",
      "language": "en-US"
    }
  }
 }
--- a/test/data/good/say-ssml.json
+++ b/test/data/good/say-ssml.json
@@ -0,0 +1,9 @@
 {
  "say": {
    "text": "<speak>I already told you I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you <break time=\"3s\"/> I <emphasis level=\"strong\">really like that person!</emphasis> this is another long text.</speak>",
    "synthesizer": {
      "vendor": "google",
      "language": "en-US"
    }
  }
 }
--- a/test/unit-tests.js
+++ b/test/unit-tests.js
@@ -44,7 +44,22 @@ test('unit tests', (t) => {
  task = makeTask(logger, require('./data/good/say-text-array'));
  t.ok(task.name === 'say', 'parsed say with multiple segments');
  task = makeTask(logger, require('./data/good/say-ssml'));
  // the ssml is more than 1000 chars, 
  // expecting first chunk is length > 100, stop at ? instead of first .
  // 2nd chunk is long text < 1000 char, stop at .
  // 3rd chunk is the rest.
  t.ok(task.text.length === 3 &&
    task.text[0].length === 187 &&
    task.text[1].length === 882 &&
    task.text[2].length === 123, 'parsed say');
  task = makeTask(logger, require('./data/bad/bad-say-ssml'));
  t.ok(task.text.length === 1 &&
    task.text[0].length === 1162, 'parsed bad say');
  const alt = require('./data/good/alternate-syntax');
  const normalize = require('../lib/utils/normalize-jambones');
  normalize(logger, alt).forEach((t) => {