diff --git a/lib/tasks/say.js b/lib/tasks/say.js index b06e50d6..e13de623 100644 --- a/lib/tasks/say.js +++ b/lib/tasks/say.js @@ -1,96 +1,26 @@ const Task = require('./task'); const {TaskName, TaskPreconditions} = require('../utils/constants'); +const pollySSMLSplit = require('polly-ssml-split'); const breakLengthyTextIfNeeded = (logger, text) => { const chunkSize = 1000; - if (text.length <= chunkSize) return [text]; - - const result = []; const isSSML = text.startsWith(''); - let startPos = 0; - let charPos = isSSML ? 7 : 0; // skip - let tag; - //logger.debug({isSSML}, `breakLengthyTextIfNeeded: handling text of length ${text.length}`); - while (startPos + charPos < text.length) { - if (isSSML && !tag && text[startPos + charPos] === '<') { - const tagStartPos = ++charPos; - while (startPos + charPos < text.length) { - if (text[startPos + charPos] === '>') { - if (text[startPos + charPos - 1] === '\\') tag = null; - else if (!tag) tag = text.substring(startPos + tagStartPos, startPos + charPos - 1); - break; - } - if (!tag) { - const c = text[startPos + charPos]; - if (c === ' ') { - tag = text.substring(startPos + tagStartPos, startPos + charPos); - //logger.debug(`breakLengthyTextIfNeeded: enter tag ${tag} (space)`); - break; - } - } - charPos++; - } - if (tag) { - //search for end of tag - //logger.debug(`breakLengthyTextIfNeeded: searching forward for `); - const e1 = text.indexOf(``, startPos + charPos); - const e2 = text.indexOf('/>', startPos + charPos); - const tagEndPos = e1 === -1 ? e2 : e2 === -1 ? e1 : Math.min(e1, e2); - if (tagEndPos === -1) { - //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} not found, exiting`); - } else { - //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} found at ${tagEndPos}`); - charPos = tagEndPos + 1; - } - tag = null; - } - continue; - } - - if (charPos < chunkSize) { - charPos++; - continue; - } - - // start looking for a good break point - let chunkIt = false; - const a = text[startPos + charPos]; - const b = text[startPos + charPos + 1]; - if (/[\.!\?]/.test(a) && /\s/.test(b)) { - //logger.debug('breakLengthyTextIfNeeded: breaking at sentence end'); - chunkIt = true; - } - if (chunkIt) { - charPos++; - const chunk = text.substr(startPos, charPos); - if (isSSML) { - result.push(0 === startPos ? `${chunk}` : `${chunk}`); - } - else result.push(chunk); - charPos = 0; - startPos += chunk.length; - - //logger.debug({chunk: result[result.length - 1]}, - // `breakLengthyTextIfNeeded: chunked; new starting pos ${startPos}`); - - } - else charPos++; + if (text.length <= chunkSize || !isSSML) return [text]; + const options = { + // MIN length + softLimit: 100, + // MAX length, exclude 15 characters + hardLimit: chunkSize - 15, + // Set of extra split characters (Optional property) + extraSplitChars: ',;!?', + }; + pollySSMLSplit.configure(options); + try { + return pollySSMLSplit.split(text); + } catch (err) { + logger.info({err}, 'Error spliting SSML long text'); + return [text]; } - - // final chunk - if (startPos < text.length) { - const chunk = text.substr(startPos); - if (isSSML) { - result.push(0 === startPos ? `${chunk}` : `${chunk}`); - } - else result.push(chunk); - - //logger.debug({chunk: result[result.length - 1]}, - // `breakLengthyTextIfNeeded: final chunk; starting pos ${startPos} length ${chunk.length}`); - - } - - return result; }; class TaskSay extends Task { diff --git a/package-lock.json b/package-lock.json index 68a2aca5..5bc7b7d3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -34,6 +34,7 @@ "moment": "^2.29.4", "parse-url": "^8.1.0", "pino": "^6.14.0", + "polly-ssml-split": "^0.1.0", "sdp-transform": "^2.14.1", "short-uuid": "^4.2.0", "to-snake-case": "^1.0.0", @@ -5956,6 +5957,19 @@ "node": ">=8" } }, + "node_modules/polly-ssml-split": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz", + "integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==", + "dependencies": { + "polly-text-split": "^0.1.4" + } + }, + "node_modules/polly-text-split": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz", + "integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw==" + }, "node_modules/prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", @@ -12345,6 +12359,19 @@ "find-up": "^4.0.0" } }, + "polly-ssml-split": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz", + "integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==", + "requires": { + "polly-text-split": "^0.1.4" + } + }, + "polly-text-split": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz", + "integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw==" + }, "prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", diff --git a/package.json b/package.json index 45e53232..c3af94f6 100644 --- a/package.json +++ b/package.json @@ -56,7 +56,8 @@ "uuid-random": "^1.3.2", "verify-aws-sns-signature": "^0.1.0", "ws": "^8.9.0", - "xml2js": "^0.4.23" + "xml2js": "^0.4.23", + "polly-ssml-split": "^0.1.0" }, "devDependencies": { "clear-module": "^4.1.2", diff --git a/test/data/bad/bad-say-ssml.json b/test/data/bad/bad-say-ssml.json new file mode 100644 index 00000000..226aa7db --- /dev/null +++ b/test/data/bad/bad-say-ssml.json @@ -0,0 +1,9 @@ +{ + "say": { + "text": "I already told you I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you I really like that person! this is another long text.", + "synthesizer": { + "vendor": "google", + "language": "en-US" + } + } +} \ No newline at end of file diff --git a/test/data/good/say-ssml.json b/test/data/good/say-ssml.json new file mode 100644 index 00000000..28a1d670 --- /dev/null +++ b/test/data/good/say-ssml.json @@ -0,0 +1,9 @@ +{ + "say": { + "text": "I already told you I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you I really like that person! this is another long text.", + "synthesizer": { + "vendor": "google", + "language": "en-US" + } + } +} \ No newline at end of file diff --git a/test/unit-tests.js b/test/unit-tests.js index ed18b2b6..c2739432 100644 --- a/test/unit-tests.js +++ b/test/unit-tests.js @@ -44,7 +44,22 @@ test('unit tests', (t) => { task = makeTask(logger, require('./data/good/say-text-array')); t.ok(task.name === 'say', 'parsed say with multiple segments'); + + task = makeTask(logger, require('./data/good/say-ssml')); + // the ssml is more than 1000 chars, + // expecting first chunk is length > 100, stop at ? instead of first . + // 2nd chunk is long text < 1000 char, stop at . + // 3rd chunk is the rest. + t.ok(task.text.length === 3 && + task.text[0].length === 187 && + task.text[1].length === 882 && + task.text[2].length === 123, 'parsed say'); + task = makeTask(logger, require('./data/bad/bad-say-ssml')); + t.ok(task.text.length === 1 && + task.text[0].length === 1162, 'parsed bad say'); + + const alt = require('./data/good/alternate-syntax'); const normalize = require('../lib/utils/normalize-jambones'); normalize(logger, alt).forEach((t) => {