diff --git a/lib/tasks/say.js b/lib/tasks/say.js
index b06e50d6..e13de623 100644
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -1,96 +1,26 @@
const Task = require('./task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
+const pollySSMLSplit = require('polly-ssml-split');
const breakLengthyTextIfNeeded = (logger, text) => {
const chunkSize = 1000;
- if (text.length <= chunkSize) return [text];
-
- const result = [];
const isSSML = text.startsWith('');
- let startPos = 0;
- let charPos = isSSML ? 7 : 0; // skip
- let tag;
- //logger.debug({isSSML}, `breakLengthyTextIfNeeded: handling text of length ${text.length}`);
- while (startPos + charPos < text.length) {
- if (isSSML && !tag && text[startPos + charPos] === '<') {
- const tagStartPos = ++charPos;
- while (startPos + charPos < text.length) {
- if (text[startPos + charPos] === '>') {
- if (text[startPos + charPos - 1] === '\\') tag = null;
- else if (!tag) tag = text.substring(startPos + tagStartPos, startPos + charPos - 1);
- break;
- }
- if (!tag) {
- const c = text[startPos + charPos];
- if (c === ' ') {
- tag = text.substring(startPos + tagStartPos, startPos + charPos);
- //logger.debug(`breakLengthyTextIfNeeded: enter tag ${tag} (space)`);
- break;
- }
- }
- charPos++;
- }
- if (tag) {
- //search for end of tag
- //logger.debug(`breakLengthyTextIfNeeded: searching forward for ${tag}>`);
- const e1 = text.indexOf(`${tag}>`, startPos + charPos);
- const e2 = text.indexOf('/>', startPos + charPos);
- const tagEndPos = e1 === -1 ? e2 : e2 === -1 ? e1 : Math.min(e1, e2);
- if (tagEndPos === -1) {
- //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} not found, exiting`);
- } else {
- //logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} found at ${tagEndPos}`);
- charPos = tagEndPos + 1;
- }
- tag = null;
- }
- continue;
- }
-
- if (charPos < chunkSize) {
- charPos++;
- continue;
- }
-
- // start looking for a good break point
- let chunkIt = false;
- const a = text[startPos + charPos];
- const b = text[startPos + charPos + 1];
- if (/[\.!\?]/.test(a) && /\s/.test(b)) {
- //logger.debug('breakLengthyTextIfNeeded: breaking at sentence end');
- chunkIt = true;
- }
- if (chunkIt) {
- charPos++;
- const chunk = text.substr(startPos, charPos);
- if (isSSML) {
- result.push(0 === startPos ? `${chunk}` : `${chunk}`);
- }
- else result.push(chunk);
- charPos = 0;
- startPos += chunk.length;
-
- //logger.debug({chunk: result[result.length - 1]},
- // `breakLengthyTextIfNeeded: chunked; new starting pos ${startPos}`);
-
- }
- else charPos++;
+ if (text.length <= chunkSize || !isSSML) return [text];
+ const options = {
+ // MIN length
+ softLimit: 100,
+ // MAX length, exclude 15 characters
+ hardLimit: chunkSize - 15,
+ // Set of extra split characters (Optional property)
+ extraSplitChars: ',;!?',
+ };
+ pollySSMLSplit.configure(options);
+ try {
+ return pollySSMLSplit.split(text);
+ } catch (err) {
+ logger.info({err}, 'Error spliting SSML long text');
+ return [text];
}
-
- // final chunk
- if (startPos < text.length) {
- const chunk = text.substr(startPos);
- if (isSSML) {
- result.push(0 === startPos ? `${chunk}` : `${chunk}`);
- }
- else result.push(chunk);
-
- //logger.debug({chunk: result[result.length - 1]},
- // `breakLengthyTextIfNeeded: final chunk; starting pos ${startPos} length ${chunk.length}`);
-
- }
-
- return result;
};
class TaskSay extends Task {
diff --git a/package-lock.json b/package-lock.json
index 68a2aca5..5bc7b7d3 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -34,6 +34,7 @@
"moment": "^2.29.4",
"parse-url": "^8.1.0",
"pino": "^6.14.0",
+ "polly-ssml-split": "^0.1.0",
"sdp-transform": "^2.14.1",
"short-uuid": "^4.2.0",
"to-snake-case": "^1.0.0",
@@ -5956,6 +5957,19 @@
"node": ">=8"
}
},
+ "node_modules/polly-ssml-split": {
+ "version": "0.1.0",
+ "resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz",
+ "integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==",
+ "dependencies": {
+ "polly-text-split": "^0.1.4"
+ }
+ },
+ "node_modules/polly-text-split": {
+ "version": "0.1.4",
+ "resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz",
+ "integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw=="
+ },
"node_modules/prelude-ls": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
@@ -12345,6 +12359,19 @@
"find-up": "^4.0.0"
}
},
+ "polly-ssml-split": {
+ "version": "0.1.0",
+ "resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz",
+ "integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==",
+ "requires": {
+ "polly-text-split": "^0.1.4"
+ }
+ },
+ "polly-text-split": {
+ "version": "0.1.4",
+ "resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz",
+ "integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw=="
+ },
"prelude-ls": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
diff --git a/package.json b/package.json
index 45e53232..c3af94f6 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,8 @@
"uuid-random": "^1.3.2",
"verify-aws-sns-signature": "^0.1.0",
"ws": "^8.9.0",
- "xml2js": "^0.4.23"
+ "xml2js": "^0.4.23",
+ "polly-ssml-split": "^0.1.0"
},
"devDependencies": {
"clear-module": "^4.1.2",
diff --git a/test/data/bad/bad-say-ssml.json b/test/data/bad/bad-say-ssml.json
new file mode 100644
index 00000000..226aa7db
--- /dev/null
+++ b/test/data/bad/bad-say-ssml.json
@@ -0,0 +1,9 @@
+{
+ "say": {
+ "text": "I already told you I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you I really like that person! this is another long text.",
+ "synthesizer": {
+ "vendor": "google",
+ "language": "en-US"
+ }
+ }
+}
\ No newline at end of file
diff --git a/test/data/good/say-ssml.json b/test/data/good/say-ssml.json
new file mode 100644
index 00000000..28a1d670
--- /dev/null
+++ b/test/data/good/say-ssml.json
@@ -0,0 +1,9 @@
+{
+ "say": {
+ "text": "I already told you I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you I really like that person! this is another long text.",
+ "synthesizer": {
+ "vendor": "google",
+ "language": "en-US"
+ }
+ }
+}
\ No newline at end of file
diff --git a/test/unit-tests.js b/test/unit-tests.js
index ed18b2b6..c2739432 100644
--- a/test/unit-tests.js
+++ b/test/unit-tests.js
@@ -44,7 +44,22 @@ test('unit tests', (t) => {
task = makeTask(logger, require('./data/good/say-text-array'));
t.ok(task.name === 'say', 'parsed say with multiple segments');
+
+ task = makeTask(logger, require('./data/good/say-ssml'));
+ // the ssml is more than 1000 chars,
+ // expecting first chunk is length > 100, stop at ? instead of first .
+ // 2nd chunk is long text < 1000 char, stop at .
+ // 3rd chunk is the rest.
+ t.ok(task.text.length === 3 &&
+ task.text[0].length === 187 &&
+ task.text[1].length === 882 &&
+ task.text[2].length === 123, 'parsed say');
+ task = makeTask(logger, require('./data/bad/bad-say-ssml'));
+ t.ok(task.text.length === 1 &&
+ task.text[0].length === 1162, 'parsed bad say');
+
+
const alt = require('./data/good/alternate-syntax');
const normalize = require('../lib/utils/normalize-jambones');
normalize(logger, alt).forEach((t) => {