mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
Feat/tts streaming (#994)
* wip * add TtsStreamingBuffer class to abstract handling of streaming tokens * wip * add throttling support * support background ttsStream (#995) * wip * add TtsStreamingBuffer class to abstract handling of streaming tokens * wip * support background ttsStream * wip --------- Co-authored-by: Dave Horton <daveh@beachdognet.com> * wip * dont send if we have nothing to send * initial testing with cartesia * wip --------- Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
This commit is contained in:
@@ -9,7 +9,8 @@ const {
|
||||
KillReason,
|
||||
RecordState,
|
||||
AllowedSipRecVerbs,
|
||||
AllowedConfirmSessionVerbs
|
||||
AllowedConfirmSessionVerbs,
|
||||
TtsStreamingEvents
|
||||
} = require('../utils/constants');
|
||||
const moment = require('moment');
|
||||
const assert = require('assert');
|
||||
@@ -21,6 +22,7 @@ const listTaskNames = require('../utils/summarize-tasks');
|
||||
const HttpRequestor = require('../utils/http-requestor');
|
||||
const WsRequestor = require('../utils/ws-requestor');
|
||||
const ActionHookDelayProcessor = require('../utils/action-hook-delay');
|
||||
const TtsStreamingBuffer = require('../utils/tts-streaming-buffer');
|
||||
const {parseUri} = require('drachtio-srf');
|
||||
const {
|
||||
JAMBONES_INJECT_CONTENT,
|
||||
@@ -413,27 +415,24 @@ class CallSession extends Emitter {
|
||||
get isAdultingCallSession() {
|
||||
return this.constructor.name === 'AdultingCallSession';
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if this session is a ConfirmCallSession
|
||||
*/
|
||||
get isConfirmCallSession() {
|
||||
return this.constructor.name === 'ConfirmCallSession';
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if this session is a SipRecCallSession
|
||||
*/
|
||||
get isSipRecCallSession() {
|
||||
return this.constructor.name === 'SipRecCallSession';
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if this session is a SmsCallSession
|
||||
*/
|
||||
get isSmsCallSession() {
|
||||
return this.constructor.name === 'SmsCallSession';
|
||||
}
|
||||
get isRestCallSession() {
|
||||
return this.constructor.name === 'RestCallSession';
|
||||
}
|
||||
get InboundCallSession() {
|
||||
return this.constructor.name === 'InboundCallSession';
|
||||
}
|
||||
get isNormalCallSession() {
|
||||
return this.constructor.name === 'InboundCallSession' || this.constructor.name === 'RestCallSession';
|
||||
}
|
||||
|
||||
get is3pccInvite() {
|
||||
return this.isInboundCallSession && this.req?.body?.length === 0;
|
||||
@@ -451,6 +450,10 @@ class CallSession extends Emitter {
|
||||
return this.backgroundTaskManager.isTaskRunning('bargeIn');
|
||||
}
|
||||
|
||||
get isTtsStreamEnabled() {
|
||||
return this.backgroundTaskManager.isTaskRunning('ttsStream');
|
||||
}
|
||||
|
||||
get isListenEnabled() {
|
||||
return this.backgroundTaskManager.isTaskRunning('listen');
|
||||
}
|
||||
@@ -513,6 +516,10 @@ class CallSession extends Emitter {
|
||||
this._sipRequestWithinDialogHook = url;
|
||||
}
|
||||
|
||||
get isTtsStreamOpen() {
|
||||
return this.currentTask?.isStreamingTts ||
|
||||
this.backgroundTaskManager.getTask('ttsStream')?.isStreamingTts;
|
||||
}
|
||||
// Bot Delay (actionHook delayed)
|
||||
get actionHookDelayEnabled() {
|
||||
return this._actionHookDelayEnabled;
|
||||
@@ -587,6 +594,25 @@ class CallSession extends Emitter {
|
||||
}
|
||||
}
|
||||
|
||||
getTsStreamingVendor() {
|
||||
let v;
|
||||
if (this.currentTask?.isStreamingTts) {
|
||||
const {vendor} = this.currentTask.getTtsVendorData(this);
|
||||
v = vendor;
|
||||
}
|
||||
else if (this.backgroundTaskManager.getTask('ttsStream')?.isStreamingTts) {
|
||||
const {vendor} = this.backgroundTaskManager.getTask('ttsStream').getTtsVendorData(this);
|
||||
v = vendor;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
get appIsUsingWebsockets() {
|
||||
return this.requestor instanceof WsRequestor;
|
||||
}
|
||||
|
||||
/* end of getters and setters */
|
||||
|
||||
async clearOrRestoreActionHookDelayProcessor() {
|
||||
if (this._actionHookDelayProcessor) {
|
||||
await this._actionHookDelayProcessor.stop();
|
||||
@@ -804,6 +830,36 @@ class CallSession extends Emitter {
|
||||
}
|
||||
}
|
||||
|
||||
async enableBackgroundTtsStream(say) {
|
||||
try {
|
||||
if (this.isTtsStreamEnabled) {
|
||||
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream currently enabled, ignoring request');
|
||||
} else if (this.appIsUsingWebsockets && this.isNormalCallSession) {
|
||||
await this.backgroundTaskManager.newTask('ttsStream', say);
|
||||
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream enabled');
|
||||
} else {
|
||||
this.logger.debug(
|
||||
'CallSession:enableBackgroundTtsStream - ignoring request as call does not have required conditions');
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.info({err, say}, 'CallSession:enableBackgroundTtsStream - Error creating background tts stream task');
|
||||
}
|
||||
}
|
||||
|
||||
disableTtsStream() {
|
||||
if (this.isTtsStreamEnabled) {
|
||||
this.backgroundTaskManager.stop('ttsStream');
|
||||
this.logger.debug('CallSession:disableTtsStream - ttsStream disabled');
|
||||
}
|
||||
}
|
||||
clearTtsStream() {
|
||||
this.ttsStreamingBuffer?.clear();
|
||||
}
|
||||
|
||||
startTtsStream() {
|
||||
this.ttsStreamingBuffer?.start();
|
||||
}
|
||||
|
||||
async enableBotMode(gather, autoEnable) {
|
||||
try {
|
||||
let task;
|
||||
@@ -1063,6 +1119,17 @@ class CallSession extends Emitter {
|
||||
this.inbandDtmfEnabled = voipCarrier?.dtmf_type === 'tones';
|
||||
}
|
||||
|
||||
if (this.isNormalCallSession) {
|
||||
this.ttsStreamingBuffer = new TtsStreamingBuffer(this);
|
||||
this.ttsStreamingBuffer.on(TtsStreamingEvents.Empty, this._onTtsStreamingEmpty.bind(this));
|
||||
this.ttsStreamingBuffer.on(TtsStreamingEvents.Pause, this._onTtsStreamingPause.bind(this));
|
||||
this.ttsStreamingBuffer.on(TtsStreamingEvents.Resume, this._onTtsStreamingResume.bind(this));
|
||||
this.ttsStreamingBuffer.on(TtsStreamingEvents.ConnectFailure, this._onTtsStreamingConnectFailure.bind(this));
|
||||
}
|
||||
else {
|
||||
this.logger.info(`CallSession:exec - not a normal call session: ${this.constructor.name}`);
|
||||
}
|
||||
|
||||
while (this.tasks.length && !this.callGone) {
|
||||
const taskNum = ++this.taskIdx;
|
||||
const stackNum = this.stackIdx;
|
||||
@@ -1646,6 +1713,39 @@ Duration=${duration} `
|
||||
.catch((err) => this.logger.error(err, 'CallSession:_lccLlmUpdate'));
|
||||
}
|
||||
|
||||
async _lccTtsTokens(opts) {
|
||||
const {id, tokens} = opts;
|
||||
|
||||
if (id === undefined) {
|
||||
this.logger.info({opts}, 'CallSession:_lccTtsTokens - invalid command since id is missing');
|
||||
return;
|
||||
}
|
||||
else if (tokens === undefined) {
|
||||
this.logger.info({opts}, 'CallSession:_lccTtsTokens - invalid command since id is missing');
|
||||
return this.requestor.request('tts:tokens-result', '/tokens-result', {
|
||||
id,
|
||||
status: 'failed',
|
||||
reason: 'missing tokens'
|
||||
}).catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskStatus - Error sending'));
|
||||
}
|
||||
let res;
|
||||
try {
|
||||
res = await this.ttsStreamingBuffer?.bufferTokens(tokens);
|
||||
this.logger.info({id, res}, 'CallSession:_lccTtsTokens - tts:tokens-result');
|
||||
} catch (err) {
|
||||
this.logger.info(err, 'CallSession:_lccTtsTokens');
|
||||
}
|
||||
this.requestor.request('tts:tokens-result', '/tokens-result', {id, ...res})
|
||||
.catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskStatus - Error sending'));
|
||||
}
|
||||
|
||||
_lccTtsFlush(opts) {
|
||||
this.ttsStreamingBuffer?.flush(opts);
|
||||
}
|
||||
|
||||
_lccTtsClear(opts) {
|
||||
this.ttsStreamingBuffer?.clear(opts);
|
||||
}
|
||||
|
||||
/**
|
||||
* perform call hangup by jambonz
|
||||
@@ -2027,6 +2127,18 @@ Duration=${duration} `
|
||||
this._lccLlmUpdate(data, call_sid);
|
||||
break;
|
||||
|
||||
case 'tts:tokens':
|
||||
this._lccTtsTokens(data);
|
||||
break;
|
||||
|
||||
case 'tts:flush':
|
||||
this._lccTtsFlush(data);
|
||||
break;
|
||||
|
||||
case 'tts:clear':
|
||||
this._lccTtsClear(data);
|
||||
break;
|
||||
|
||||
default:
|
||||
this.logger.info(`CallSession:_onCommand - invalid command ${command}`);
|
||||
}
|
||||
@@ -2221,6 +2333,8 @@ Duration=${duration} `
|
||||
// close all background tasks
|
||||
this.backgroundTaskManager.stopAll();
|
||||
this.clearOrRestoreActionHookDelayProcessor().catch((err) => {});
|
||||
|
||||
this.ttsStreamingBuffer?.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2776,6 +2890,37 @@ Duration=${duration} `
|
||||
this.verbHookSpan = null;
|
||||
}
|
||||
}
|
||||
|
||||
_onTtsStreamingEmpty() {
|
||||
const task = this.currentTask;
|
||||
if (task && TaskName.Say === task.name) {
|
||||
task.notifyTtsStreamIsEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
_onTtsStreamingPause() {
|
||||
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_paused'})
|
||||
.catch((err) => this.logger.info({err}, 'CallSession:_onTtsStreamingPause - Error sending'));
|
||||
}
|
||||
|
||||
_onTtsStreamingResume() {
|
||||
this.requestor?.request('tts:streaming-event', 'streaming-event', {event_type: 'stream_resumed'})
|
||||
.catch((err) => this.logger.info({err}, 'CallSession:_onTtsStreamingResume - Error sending'));
|
||||
}
|
||||
|
||||
async _onTtsStreamingConnectFailure(vendor) {
|
||||
const {writeAlerts, AlertType} = this.srf.locals;
|
||||
try {
|
||||
await writeAlerts({
|
||||
alert_type: AlertType.TTS_STREAMING_CONNECTION_FAILURE,
|
||||
account_sid: this.accountSid,
|
||||
vendor
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error({error}, 'Error writing WEBHOOK_CONNECTION_FAILURE alert');
|
||||
}
|
||||
this.logger.info({vendor}, 'CallSession:_onTtsStreamingConnectFailure - tts streaming connect failure');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = CallSession;
|
||||
|
||||
Reference in New Issue
Block a user