mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
Say length text (#165)
* typo for media bug name in azure and punctuation fix * say: split very long text intelligently * more fixes from testing * update to latest synthAudio
This commit is contained in:
@@ -1,12 +1,107 @@
|
||||
const Task = require('./task');
|
||||
const {TaskName, TaskPreconditions} = require('../utils/constants');
|
||||
|
||||
const breakLengthyTextIfNeeded = (logger, text) => {
|
||||
const chunkSize = 1000;
|
||||
if (text.length <= chunkSize) return [text];
|
||||
|
||||
const result = [];
|
||||
const isSSML = text.startsWith('<speak>');
|
||||
let startPos = 0;
|
||||
let charPos = isSSML ? 7 : 0; // skip <speak>
|
||||
let tag;
|
||||
//logger.debug({isSSML}, `breakLengthyTextIfNeeded: handling text of length ${text.length}`);
|
||||
while (startPos + charPos < text.length) {
|
||||
if (isSSML && !tag && text[startPos + charPos] === '<') {
|
||||
const tagStartPos = ++charPos;
|
||||
while (startPos + charPos < text.length) {
|
||||
if (text[startPos + charPos] === '>') {
|
||||
if (text[startPos + charPos - 1] === '\\') tag = null;
|
||||
else if (!tag) tag = text.substring(startPos + tagStartPos, startPos + charPos - 1);
|
||||
break;
|
||||
}
|
||||
if (!tag) {
|
||||
const c = text[startPos + charPos];
|
||||
if (c === ' ') {
|
||||
tag = text.substring(startPos + tagStartPos, startPos + charPos);
|
||||
//logger.debug(`breakLengthyTextIfNeeded: enter tag ${tag} (space)`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
charPos++;
|
||||
}
|
||||
if (tag) {
|
||||
//search for end of tag
|
||||
//logger.debug(`breakLengthyTextIfNeeded: searching forward for </${tag}>`);
|
||||
const e1 = text.indexOf(`</${tag}>`, startPos + charPos);
|
||||
const e2 = text.indexOf('/>', startPos + charPos);
|
||||
const tagEndPos = e1 === -1 ? e2 : e2 === -1 ? e1 : Math.min(e1, e2);
|
||||
if (tagEndPos === -1) {
|
||||
//logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} not found, exiting`);
|
||||
} else {
|
||||
//logger.debug(`breakLengthyTextIfNeeded: exit tag ${tag} found at ${tagEndPos}`);
|
||||
charPos = tagEndPos + 1;
|
||||
}
|
||||
tag = null;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (charPos < chunkSize) {
|
||||
charPos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// start looking for a good break point
|
||||
let chunkIt = false;
|
||||
const a = text[startPos + charPos];
|
||||
const b = text[startPos + charPos + 1];
|
||||
if (/[\.!\?]/.test(a) && /\s/.test(b)) {
|
||||
//logger.debug('breakLengthyTextIfNeeded: breaking at sentence end');
|
||||
chunkIt = true;
|
||||
}
|
||||
if (chunkIt) {
|
||||
charPos++;
|
||||
const chunk = text.substr(startPos, charPos);
|
||||
if (isSSML) {
|
||||
result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}</speak>`);
|
||||
}
|
||||
else result.push(chunk);
|
||||
charPos = 0;
|
||||
startPos += chunk.length;
|
||||
|
||||
//logger.debug({chunk: result[result.length - 1]},
|
||||
// `breakLengthyTextIfNeeded: chunked; new starting pos ${startPos}`);
|
||||
|
||||
}
|
||||
else charPos++;
|
||||
}
|
||||
|
||||
// final chunk
|
||||
if (startPos < text.length) {
|
||||
const chunk = text.substr(startPos);
|
||||
if (isSSML) {
|
||||
result.push(0 === startPos ? `${chunk}</speak>` : `<speak>${chunk}`);
|
||||
}
|
||||
else result.push(chunk);
|
||||
|
||||
//logger.debug({chunk: result[result.length - 1]},
|
||||
// `breakLengthyTextIfNeeded: final chunk; starting pos ${startPos} length ${chunk.length}`);
|
||||
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
class TaskSay extends Task {
|
||||
constructor(logger, opts, parentTask) {
|
||||
super(logger, opts);
|
||||
this.preconditions = TaskPreconditions.Endpoint;
|
||||
|
||||
this.text = Array.isArray(this.data.text) ? this.data.text : [this.data.text];
|
||||
this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
|
||||
.map((t) => breakLengthyTextIfNeeded(this.logger, t))
|
||||
.flat();
|
||||
|
||||
this.loop = this.data.loop || 1;
|
||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||
this.synthesizer = this.data.synthesizer || {};
|
||||
|
||||
@@ -173,6 +173,7 @@ class TaskTranscribe extends Task {
|
||||
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
|
||||
].forEach((arr) => {
|
||||
if (this[arr[0]]) opts[arr[1]] = true;
|
||||
else if (this[arr[0]] === false) opts[arr[1]] = false;
|
||||
});
|
||||
if (this.hints.length > 0) {
|
||||
opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');
|
||||
|
||||
Reference in New Issue
Block a user