Feat/tts streaming (#994)

* wip

* add TtsStreamingBuffer class to abstract handling of streaming tokens

* wip

* add throttling support

* support background ttsStream (#995)

* wip

* add TtsStreamingBuffer class to abstract handling of streaming tokens

* wip

* support background ttsStream

* wip

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>

* wip

* dont send if we have nothing to send

* initial testing with cartesia

* wip

---------

Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
This commit is contained in:
Dave Horton
2024-12-18 14:44:37 -05:00
committed by GitHub
parent f37e1540ee
commit ba3f46df64
11 changed files with 731 additions and 64 deletions

View File

@@ -9,7 +9,8 @@ const {
KillReason,
RecordState,
AllowedSipRecVerbs,
AllowedConfirmSessionVerbs
AllowedConfirmSessionVerbs,
TtsStreamingEvents
} = require('../utils/constants');
const moment = require('moment');
const assert = require('assert');
@@ -21,6 +22,7 @@ const listTaskNames = require('../utils/summarize-tasks');
const HttpRequestor = require('../utils/http-requestor');
const WsRequestor = require('../utils/ws-requestor');
const ActionHookDelayProcessor = require('../utils/action-hook-delay');
const TtsStreamingBuffer = require('../utils/tts-streaming-buffer');
const {parseUri} = require('drachtio-srf');
const {
JAMBONES_INJECT_CONTENT,
@@ -413,27 +415,24 @@ class CallSession extends Emitter {
get isAdultingCallSession() {
return this.constructor.name === 'AdultingCallSession';
}
/**
* returns true if this session is a ConfirmCallSession
*/
get isConfirmCallSession() {
return this.constructor.name === 'ConfirmCallSession';
}
/**
* returns true if this session is a SipRecCallSession
*/
get isSipRecCallSession() {
return this.constructor.name === 'SipRecCallSession';
}
/**
* returns true if this session is a SmsCallSession
*/
get isSmsCallSession() {
return this.constructor.name === 'SmsCallSession';
}
get isRestCallSession() {
return this.constructor.name === 'RestCallSession';
}
get InboundCallSession() {
return this.constructor.name === 'InboundCallSession';
}
get isNormalCallSession() {
return this.constructor.name === 'InboundCallSession' || this.constructor.name === 'RestCallSession';
}
get is3pccInvite() {
return this.isInboundCallSession && this.req?.body?.length === 0;
@@ -451,6 +450,10 @@ class CallSession extends Emitter {
return this.backgroundTaskManager.isTaskRunning('bargeIn');
}
get isTtsStreamEnabled() {
return this.backgroundTaskManager.isTaskRunning('ttsStream');
}
get isListenEnabled() {
return this.backgroundTaskManager.isTaskRunning('listen');
}
@@ -513,6 +516,10 @@ class CallSession extends Emitter {
this._sipRequestWithinDialogHook = url;
}
get isTtsStreamOpen() {
return this.currentTask?.isStreamingTts ||
this.backgroundTaskManager.getTask('ttsStream')?.isStreamingTts;
}
// Bot Delay (actionHook delayed)
get actionHookDelayEnabled() {
return this._actionHookDelayEnabled;
@@ -587,6 +594,25 @@ class CallSession extends Emitter {
}
}
getTsStreamingVendor() {
let v;
if (this.currentTask?.isStreamingTts) {
const {vendor} = this.currentTask.getTtsVendorData(this);
v = vendor;
}
else if (this.backgroundTaskManager.getTask('ttsStream')?.isStreamingTts) {
const {vendor} = this.backgroundTaskManager.getTask('ttsStream').getTtsVendorData(this);
v = vendor;
}
return v;
}
get appIsUsingWebsockets() {
return this.requestor instanceof WsRequestor;
}
/* end of getters and setters */
async clearOrRestoreActionHookDelayProcessor() {
if (this._actionHookDelayProcessor) {
await this._actionHookDelayProcessor.stop();
@@ -804,6 +830,36 @@ class CallSession extends Emitter {
}
}
async enableBackgroundTtsStream(say) {
try {
if (this.isTtsStreamEnabled) {
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream currently enabled, ignoring request');
} else if (this.appIsUsingWebsockets && this.isNormalCallSession) {
await this.backgroundTaskManager.newTask('ttsStream', say);
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream enabled');
} else {
this.logger.debug(
'CallSession:enableBackgroundTtsStream - ignoring request as call does not have required conditions');
}
} catch (err) {
this.logger.info({err, say}, 'CallSession:enableBackgroundTtsStream - Error creating background tts stream task');
}
}
disableTtsStream() {
if (this.isTtsStreamEnabled) {
this.backgroundTaskManager.stop('ttsStream');
this.logger.debug('CallSession:disableTtsStream - ttsStream disabled');
}
}
clearTtsStream() {
this.ttsStreamingBuffer?.clear();
}
startTtsStream() {
this.ttsStreamingBuffer?.start();
}
async enableBotMode(gather, autoEnable) {
try {
let task;
@@ -1063,6 +1119,17 @@ class CallSession extends Emitter {
this.inbandDtmfEnabled = voipCarrier?.dtmf_type === 'tones';
}
if (this.isNormalCallSession) {
this.ttsStreamingBuffer = new TtsStreamingBuffer(this);
this.ttsStreamingBuffer.on(TtsStreamingEvents.Empty, this._onTtsStreamingEmpty.bind(this));
this.ttsStreamingBuffer.on(TtsStreamingEvents.Pause, this._onTtsStreamingPause.bind(this));
this.ttsStreamingBuffer.on(TtsStreamingEvents.Resume, this._onTtsStreamingResume.bind(this));
this.ttsStreamingBuffer.on(TtsStreamingEvents.ConnectFailure, this._onTtsStreamingConnectFailure.bind(this));
}
else {
this.logger.info(`CallSession:exec - not a normal call session: ${this.constructor.name}`);
}
while (this.tasks.length && !this.callGone) {
const taskNum = ++this.taskIdx;
const stackNum = this.stackIdx;
@@ -1646,6 +1713,39 @@ Duration=${duration} `
.catch((err) => this.logger.error(err, 'CallSession:_lccLlmUpdate'));
}
async _lccTtsTokens(opts) {
const {id, tokens} = opts;
if (id === undefined) {
this.logger.info({opts}, 'CallSession:_lccTtsTokens - invalid command since id is missing');
return;
}
else if (tokens === undefined) {
this.logger.info({opts}, 'CallSession:_lccTtsTokens - invalid command since id is missing');
return this.requestor.request('tts:tokens-result', '/tokens-result', {
id,
status: 'failed',
reason: 'missing tokens'
}).catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskStatus - Error sending'));
}
let res;
try {
res = await this.ttsStreamingBuffer?.bufferTokens(tokens);
this.logger.info({id, res}, 'CallSession:_lccTtsTokens - tts:tokens-result');
} catch (err) {
this.logger.info(err, 'CallSession:_lccTtsTokens');
}
this.requestor.request('tts:tokens-result', '/tokens-result', {id, ...res})
.catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskStatus - Error sending'));
}
_lccTtsFlush(opts) {
this.ttsStreamingBuffer?.flush(opts);
}
_lccTtsClear(opts) {
this.ttsStreamingBuffer?.clear(opts);
}
/**
* perform call hangup by jambonz
@@ -2027,6 +2127,18 @@ Duration=${duration} `
this._lccLlmUpdate(data, call_sid);
break;
case 'tts:tokens':
this._lccTtsTokens(data);
break;
case 'tts:flush':
this._lccTtsFlush(data);
break;
case 'tts:clear':
this._lccTtsClear(data);
break;
default:
this.logger.info(`CallSession:_onCommand - invalid command ${command}`);
}
@@ -2221,6 +2333,8 @@ Duration=${duration} `
// close all background tasks
this.backgroundTaskManager.stopAll();
this.clearOrRestoreActionHookDelayProcessor().catch((err) => {});
this.ttsStreamingBuffer?.stop();
}
/**
@@ -2776,6 +2890,37 @@ Duration=${duration} `
this.verbHookSpan = null;
}
}
_onTtsStreamingEmpty() {
const task = this.currentTask;
if (task && TaskName.Say === task.name) {
task.notifyTtsStreamIsEmpty();
}
}
_onTtsStreamingPause() {
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_paused'})
.catch((err) => this.logger.info({err}, 'CallSession:_onTtsStreamingPause - Error sending'));
}
_onTtsStreamingResume() {
this.requestor?.request('tts:streaming-event', 'streaming-event', {event_type: 'stream_resumed'})
.catch((err) => this.logger.info({err}, 'CallSession:_onTtsStreamingResume - Error sending'));
}
async _onTtsStreamingConnectFailure(vendor) {
const {writeAlerts, AlertType} = this.srf.locals;
try {
await writeAlerts({
alert_type: AlertType.TTS_STREAMING_CONNECTION_FAILURE,
account_sid: this.accountSid,
vendor
});
} catch (error) {
this.logger.error({error}, 'Error writing WEBHOOK_CONNECTION_FAILURE alert');
}
this.logger.info({vendor}, 'CallSession:_onTtsStreamingConnectFailure - tts streaming connect failure');
}
}
module.exports = CallSession;