mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-02-14 18:30:59 +00:00
Feat/tts streaming (#994)
* wip * add TtsStreamingBuffer class to abstract handling of streaming tokens * wip * add throttling support * support background ttsStream (#995) * wip * add TtsStreamingBuffer class to abstract handling of streaming tokens * wip * support background ttsStream * wip --------- Co-authored-by: Dave Horton <daveh@beachdognet.com> * wip * dont send if we have nothing to send * initial testing with cartesia * wip --------- Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
const assert = require('assert');
|
||||
const TtsTask = require('./tts-task');
|
||||
const {TaskName, TaskPreconditions} = require('../utils/constants');
|
||||
const pollySSMLSplit = require('polly-ssml-split');
|
||||
@@ -35,24 +36,40 @@ class TaskSay extends TtsTask {
|
||||
super(logger, opts, parentTask);
|
||||
this.preconditions = TaskPreconditions.Endpoint;
|
||||
|
||||
this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
|
||||
.map((t) => breakLengthyTextIfNeeded(this.logger, t))
|
||||
.flat();
|
||||
assert.ok((typeof this.data.text === 'string' || Array.isArray(this.data.text)) || this.data.stream === true,
|
||||
'Say: either text or stream:true is required');
|
||||
|
||||
this.loop = this.data.loop || 1;
|
||||
this.isHandledByPrimaryProvider = true;
|
||||
|
||||
if (this.data.stream === true) {
|
||||
this._isStreamingTts = true;
|
||||
this.closeOnStreamEmpty = this.data.closeOnStreamEmpty !== false;
|
||||
}
|
||||
else {
|
||||
this._isStreamingTts = false;
|
||||
this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
|
||||
.map((t) => breakLengthyTextIfNeeded(this.logger, t))
|
||||
.flat();
|
||||
|
||||
this.loop = this.data.loop || 1;
|
||||
this.isHandledByPrimaryProvider = true;
|
||||
}
|
||||
}
|
||||
|
||||
get name() { return TaskName.Say; }
|
||||
|
||||
get summary() {
|
||||
for (let i = 0; i < this.text.length; i++) {
|
||||
if (this.text[i].startsWith('silence_stream')) continue;
|
||||
return `${this.name}{text=${this.text[i].slice(0, 15)}${this.text[i].length > 15 ? '...' : ''}}`;
|
||||
if (this.isStreamingTts) return `${this.name} streaming`;
|
||||
else {
|
||||
for (let i = 0; i < this.text.length; i++) {
|
||||
if (this.text[i].startsWith('silence_stream')) continue;
|
||||
return `${this.name}{text=${this.text[i].slice(0, 15)}${this.text[i].length > 15 ? '...' : ''}}`;
|
||||
}
|
||||
return `${this.name}{${this.text[0]}}`;
|
||||
}
|
||||
return `${this.name}{${this.text[0]}}`;
|
||||
}
|
||||
|
||||
get isStreamingTts() { return this._isStreamingTts; }
|
||||
|
||||
_validateURL(urlString) {
|
||||
try {
|
||||
new URL(urlString);
|
||||
@@ -63,14 +80,19 @@ class TaskSay extends TtsTask {
|
||||
}
|
||||
|
||||
async exec(cs, obj) {
|
||||
if (this.isStreamingTts && !cs.appIsUsingWebsockets) {
|
||||
throw new Error('Say: streaming say verb requires applications to use the websocket API');
|
||||
}
|
||||
|
||||
try {
|
||||
await this.handling(cs, obj);
|
||||
if (this.isStreamingTts) await this.handlingStreaming(cs, obj);
|
||||
else await this.handling(cs, obj);
|
||||
this.emit('playDone');
|
||||
} catch (error) {
|
||||
if (error instanceof SpeechCredentialError) {
|
||||
// if say failed due to speech credentials, alarm is writtern and error notification is sent
|
||||
// finished this say to move to next task.
|
||||
this.logger.info('Say failed due to SpeechCredentialError, finished!');
|
||||
this.logger.info({error}, 'Say failed due to SpeechCredentialError, finished!');
|
||||
this.emit('playDone');
|
||||
return;
|
||||
}
|
||||
@@ -78,6 +100,35 @@ class TaskSay extends TtsTask {
|
||||
}
|
||||
}
|
||||
|
||||
async handlingStreaming(cs, {ep}) {
|
||||
const {vendor, language, voice, label} = this.getTtsVendorData(cs);
|
||||
const credentials = cs.getSpeechCredentials(vendor, 'tts', label);
|
||||
if (!credentials) {
|
||||
throw new SpeechCredentialError(
|
||||
`No text-to-speech service credentials for ${vendor} with labels: ${label} have been configured`);
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
await this.setTtsStreamingChannelVars(vendor, language, voice, credentials, ep);
|
||||
|
||||
await cs.startTtsStream();
|
||||
|
||||
cs.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_open'})
|
||||
.catch((err) => this.logger.info({err}, 'TaskSay:handlingStreaming - Error sending'));
|
||||
} catch (err) {
|
||||
this.logger.info({err}, 'TaskSay:handlingStreaming - Error setting channel vars');
|
||||
cs.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
|
||||
.catch((err) => this.logger.info({err}, 'TaskSay:handlingStreaming - Error sending'));
|
||||
|
||||
//TODO: send tts:streaming-event with error?
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
await this.awaitTaskDone();
|
||||
this.logger.info('TaskSay:handlingStreaming - done');
|
||||
}
|
||||
|
||||
async handling(cs, {ep}) {
|
||||
const {srf, accountSid:account_sid, callSid:target_sid} = cs;
|
||||
const {writeAlerts, AlertType} = srf.locals;
|
||||
@@ -96,7 +147,7 @@ class TaskSay extends TtsTask {
|
||||
let voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
|
||||
this.synthesizer.voice :
|
||||
cs.speechSynthesisVoice;
|
||||
let label = this.taskInlcudeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
|
||||
let label = this.taskIncludeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
|
||||
|
||||
const fallbackVendor = this.synthesizer.fallbackVendor && this.synthesizer.fallbackVendor !== 'default' ?
|
||||
this.synthesizer.fallbackVendor :
|
||||
@@ -107,7 +158,7 @@ class TaskSay extends TtsTask {
|
||||
const fallbackVoice = this.synthesizer.fallbackVoice && this.synthesizer.fallbackVoice !== 'default' ?
|
||||
this.synthesizer.fallbackVoice :
|
||||
cs.fallbackSpeechSynthesisVoice;
|
||||
const fallbackLabel = this.taskInlcudeSynthesizer ?
|
||||
const fallbackLabel = this.taskIncludeSynthesizer ?
|
||||
this.synthesizer.fallbackLabel : cs.fallbackSpeechSynthesisLabel;
|
||||
|
||||
if (cs.hasFallbackTts) {
|
||||
@@ -253,6 +304,7 @@ class TaskSay extends TtsTask {
|
||||
this._playResolve = null;
|
||||
}
|
||||
}
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
_addStreamingTtsAttributes(span, evt) {
|
||||
@@ -273,6 +325,13 @@ class TaskSay extends TtsTask {
|
||||
delete attrs['cache_filename']; //no value in adding this to the span
|
||||
span.setAttributes(attrs);
|
||||
}
|
||||
|
||||
notifyTtsStreamIsEmpty() {
|
||||
if (this.isStreamingTts && this.closeOnStreamEmpty) {
|
||||
this.logger.info('TaskSay:notifyTtsStreamIsEmpty - stream is empty, killing task');
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const spanMapping = {
|
||||
|
||||
Reference in New Issue
Block a user