Say length text (#165)

* typo for media bug name in azure and punctuation fix * say: split very long text intelligently * more fixes from testing * update to latest synthAudio
2025-12-20 16:50:39 +00:00 · 2022-09-14 17:17:29 +02:00
parent 876824abde
commit bd49dacac4
4 changed files with 106 additions and 10 deletions
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -1,12 +1,107 @@
 const Task = require('./task');
 const {TaskName, TaskPreconditions} = require('../utils/constants');

+const breakLengthyTextIfNeeded = (logger,  text) => {
+  const chunkSize = 1000;
+  if (text.length <= chunkSize) return [text];
+
+  const result = [];
+  const isSSML = text.startsWith('<speak>');
+  let startPos = 0;
+  let charPos = isSSML ? 7 : 0;  // skip <speak>
+  let tag;
+  //logger.debug({isSSML}, `breakLengthyTextIfNeeded: handling text of length ${text.length}`);
+  while (startPos + charPos < text.length) {
+    if (isSSML && !tag && text[startPos + charPos] === '<') {
+      const tagStartPos = ++charPos;
+      while (startPos + charPos < text.length) {
+        if (text[startPos + charPos] === '>') {
+          if (text[startPos + charPos - 1] === '\\') tag = null;
+          else if (!tag) tag = text.substring(startPos + tagStartPos, startPos + charPos - 1);
+          break;
+        }
+        if (!tag) {
+          const c = text[startPos + charPos];
+          if (c === ' ') {
+            tag = text.substring(startPos + tagStartPos, startPos + charPos);
+            //logger.debug(`breakLengthyTextIfNeeded: enter tag ${tag} (space)`);
+            break;
+          }
+        }
+        charPos++;
+      }
+      if (tag) {
+        //search for end of tag
+        //logger.debug(`breakLengthyTextIfNeeded: searching forward for </${tag}>`);
+        const e1 = text.indexOf(`</${tag}>`, startPos + charPos);
+        const e2 = text.indexOf('/>', startPos + charPos);
+        const tagEndPos = e1 === -1 ? e2 : e2 === -1 ? e1 : Math.min(e1, e2);
+        if (tagEndPos === -1) {
+          //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} not found, exiting`);
+        } else {
+          //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} found at ${tagEndPos}`);
+          charPos = tagEndPos + 1;
+        }
+        tag = null;
+      }
+      continue;
+    }
+
+    if (charPos < chunkSize) {
+      charPos++;
+      continue;
+    }
+
+    // start looking for a good break point
+    let chunkIt = false;
+    const a = text[startPos + charPos];
+    const b = text[startPos + charPos + 1];
+    if (/[\.!\?]/.test(a) && /\s/.test(b)) {
+      //logger.debug('breakLengthyTextIfNeeded: breaking at sentence end');
+      chunkIt = true;
+    }
+    if (chunkIt) {
+      charPos++;
+      const chunk = text.substr(startPos, charPos);
+      if (isSSML) {
+        result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}</speak>`);
+      }
+      else result.push(chunk);
+      charPos = 0;
+      startPos += chunk.length;
+
+      //logger.debug({chunk: result[result.length - 1]},
+      //  `breakLengthyTextIfNeeded: chunked; new starting pos ${startPos}`);
+
+    }
+    else charPos++;
+  }
+
+  // final chunk
+  if (startPos < text.length) {
+    const chunk = text.substr(startPos);
+    if (isSSML) {
+      result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}`);
+    }
+    else result.push(chunk);
+
+    //logger.debug({chunk: result[result.length - 1]},
+    //  `breakLengthyTextIfNeeded: final chunk; starting pos ${startPos} length ${chunk.length}`);
+
+  }
+
+  return result;
+};
+
 class TaskSay extends Task {
  constructor(logger, opts, parentTask) {
    super(logger, opts);
    this.preconditions = TaskPreconditions.Endpoint;

-    this.text = Array.isArray(this.data.text) ? this.data.text : [this.data.text];
+    this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
+      .map((t) => breakLengthyTextIfNeeded(this.logger, t))
+      .flat();
+
    this.loop = this.data.loop || 1;
    this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
    this.synthesizer = this.data.synthesizer || {};
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -173,6 +173,7 @@ class TaskTranscribe extends Task {
        ['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
      ].forEach((arr) => {
        if (this[arr[0]]) opts[arr[1]] = true;
+        else if (this[arr[0]] === false) opts[arr[1]] = false;
      });
      if (this.hints.length > 0) {
        opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');