fix: split ssml to correct chunks (#225)

* fix: split ssml to correct chunks

* fix: split ssml to correct chunks

* fixed: eslint

* fixed: eslint

* fixed: add comment to testcase

* fixed: review comments

* fixed: review comments

* fixed: review comments

* fixed: review comments

* fixed: review comments

Co-authored-by: Quan HL <quanluuhoang8@gmail.com>
This commit is contained in:
Hoan Luu Huu
2023-01-24 21:48:31 +07:00
committed by GitHub
parent 8c0044a378
commit 088316d266
6 changed files with 78 additions and 87 deletions

View File

@@ -1,96 +1,26 @@
const Task = require('./task'); const Task = require('./task');
const {TaskName, TaskPreconditions} = require('../utils/constants'); const {TaskName, TaskPreconditions} = require('../utils/constants');
const pollySSMLSplit = require('polly-ssml-split');
const breakLengthyTextIfNeeded = (logger, text) => { const breakLengthyTextIfNeeded = (logger, text) => {
const chunkSize = 1000; const chunkSize = 1000;
if (text.length <= chunkSize) return [text];
const result = [];
const isSSML = text.startsWith('<speak>'); const isSSML = text.startsWith('<speak>');
let startPos = 0; if (text.length <= chunkSize || !isSSML) return [text];
let charPos = isSSML ? 7 : 0; // skip <speak> const options = {
let tag; // MIN length
//logger.debug({isSSML}, `breakLengthyTextIfNeeded: handling text of length ${text.length}`); softLimit: 100,
while (startPos + charPos < text.length) { // MAX length, exclude 15 characters <speak></speak>
if (isSSML && !tag && text[startPos + charPos] === '<') { hardLimit: chunkSize - 15,
const tagStartPos = ++charPos; // Set of extra split characters (Optional property)
while (startPos + charPos < text.length) { extraSplitChars: ',;!?',
if (text[startPos + charPos] === '>') { };
if (text[startPos + charPos - 1] === '\\') tag = null; pollySSMLSplit.configure(options);
else if (!tag) tag = text.substring(startPos + tagStartPos, startPos + charPos - 1); try {
break; return pollySSMLSplit.split(text);
} } catch (err) {
if (!tag) { logger.info({err}, 'Error spliting SSML long text');
const c = text[startPos + charPos]; return [text];
if (c === ' ') {
tag = text.substring(startPos + tagStartPos, startPos + charPos);
//logger.debug(`breakLengthyTextIfNeeded: enter tag ${tag} (space)`);
break;
}
}
charPos++;
}
if (tag) {
//search for end of tag
//logger.debug(`breakLengthyTextIfNeeded: searching forward for </${tag}>`);
const e1 = text.indexOf(`</${tag}>`, startPos + charPos);
const e2 = text.indexOf('/>', startPos + charPos);
const tagEndPos = e1 === -1 ? e2 : e2 === -1 ? e1 : Math.min(e1, e2);
if (tagEndPos === -1) {
//logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} not found, exiting`);
} else {
//logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} found at ${tagEndPos}`);
charPos = tagEndPos + 1;
}
tag = null;
}
continue;
}
if (charPos < chunkSize) {
charPos++;
continue;
}
// start looking for a good break point
let chunkIt = false;
const a = text[startPos + charPos];
const b = text[startPos + charPos + 1];
if (/[\.!\?]/.test(a) && /\s/.test(b)) {
//logger.debug('breakLengthyTextIfNeeded: breaking at sentence end');
chunkIt = true;
}
if (chunkIt) {
charPos++;
const chunk = text.substr(startPos, charPos);
if (isSSML) {
result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}</speak>`);
}
else result.push(chunk);
charPos = 0;
startPos += chunk.length;
//logger.debug({chunk: result[result.length - 1]},
// `breakLengthyTextIfNeeded: chunked; new starting pos ${startPos}`);
}
else charPos++;
} }
// final chunk
if (startPos < text.length) {
const chunk = text.substr(startPos);
if (isSSML) {
result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}`);
}
else result.push(chunk);
//logger.debug({chunk: result[result.length - 1]},
// `breakLengthyTextIfNeeded: final chunk; starting pos ${startPos} length ${chunk.length}`);
}
return result;
}; };
class TaskSay extends Task { class TaskSay extends Task {

27
package-lock.json generated
View File

@@ -34,6 +34,7 @@
"moment": "^2.29.4", "moment": "^2.29.4",
"parse-url": "^8.1.0", "parse-url": "^8.1.0",
"pino": "^6.14.0", "pino": "^6.14.0",
"polly-ssml-split": "^0.1.0",
"sdp-transform": "^2.14.1", "sdp-transform": "^2.14.1",
"short-uuid": "^4.2.0", "short-uuid": "^4.2.0",
"to-snake-case": "^1.0.0", "to-snake-case": "^1.0.0",
@@ -5956,6 +5957,19 @@
"node": ">=8" "node": ">=8"
} }
}, },
"node_modules/polly-ssml-split": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz",
"integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==",
"dependencies": {
"polly-text-split": "^0.1.4"
}
},
"node_modules/polly-text-split": {
"version": "0.1.4",
"resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz",
"integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw=="
},
"node_modules/prelude-ls": { "node_modules/prelude-ls": {
"version": "1.2.1", "version": "1.2.1",
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
@@ -12345,6 +12359,19 @@
"find-up": "^4.0.0" "find-up": "^4.0.0"
} }
}, },
"polly-ssml-split": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/polly-ssml-split/-/polly-ssml-split-0.1.0.tgz",
"integrity": "sha512-vweYqyWC4WwUZPh8cywLeYpj5IswgAXhc+twq8Y6inqFo32JU8YlAZtFmHPhdI456gh3bSwupLaL+6WV9CQuUw==",
"requires": {
"polly-text-split": "^0.1.4"
}
},
"polly-text-split": {
"version": "0.1.4",
"resolved": "https://registry.npmjs.org/polly-text-split/-/polly-text-split-0.1.4.tgz",
"integrity": "sha512-WhYm13sQyPxdn5yWpGi45WFWZOruKBqs+y0iXWVz16y+yV612WjOwqvh4s1j7CgWbid+8rbjiHjxvZwJE1zVFw=="
},
"prelude-ls": { "prelude-ls": {
"version": "1.2.1", "version": "1.2.1",
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",

View File

@@ -56,7 +56,8 @@
"uuid-random": "^1.3.2", "uuid-random": "^1.3.2",
"verify-aws-sns-signature": "^0.1.0", "verify-aws-sns-signature": "^0.1.0",
"ws": "^8.9.0", "ws": "^8.9.0",
"xml2js": "^0.4.23" "xml2js": "^0.4.23",
"polly-ssml-split": "^0.1.0"
}, },
"devDependencies": { "devDependencies": {
"clear-module": "^4.1.2", "clear-module": "^4.1.2",

View File

@@ -0,0 +1,9 @@
{
"say": {
"text": "<speak>I already told you <emphasis level=\"strong\">I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you <break time=\"3s\"/> I really like that person!</emphasis> this is another long text.</speak>",
"synthesizer": {
"vendor": "google",
"language": "en-US"
}
}
}

View File

@@ -0,0 +1,9 @@
{
"say": {
"text": "<speak>I already told you I already told you I already told you I already told you I already told you! I already told you I already told you I already told you I already told you? I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you I already told you told I already told you I already told you told I already told you I already told you. I already told you <break time=\"3s\"/> I <emphasis level=\"strong\">really like that person!</emphasis> this is another long text.</speak>",
"synthesizer": {
"vendor": "google",
"language": "en-US"
}
}
}

View File

@@ -44,7 +44,22 @@ test('unit tests', (t) => {
task = makeTask(logger, require('./data/good/say-text-array')); task = makeTask(logger, require('./data/good/say-text-array'));
t.ok(task.name === 'say', 'parsed say with multiple segments'); t.ok(task.name === 'say', 'parsed say with multiple segments');
task = makeTask(logger, require('./data/good/say-ssml'));
// the ssml is more than 1000 chars,
// expecting first chunk is length > 100, stop at ? instead of first .
// 2nd chunk is long text < 1000 char, stop at .
// 3rd chunk is the rest.
t.ok(task.text.length === 3 &&
task.text[0].length === 187 &&
task.text[1].length === 882 &&
task.text[2].length === 123, 'parsed say');
task = makeTask(logger, require('./data/bad/bad-say-ssml'));
t.ok(task.text.length === 1 &&
task.text[0].length === 1162, 'parsed bad say');
const alt = require('./data/good/alternate-syntax'); const alt = require('./data/good/alternate-syntax');
const normalize = require('../lib/utils/normalize-jambones'); const normalize = require('../lib/utils/normalize-jambones');
normalize(logger, alt).forEach((t) => { normalize(logger, alt).forEach((t) => {