Compare commits

..

7 Commits

Author SHA1 Message Date
Dave Horton
a59784b8ab update base image to node:20-alpine (#1022) 2025-01-04 16:38:25 -05:00
Dave Horton
a2581eaeb4 tts throttling and send user_interruption event (#1019)
* tts throttling and send user_interruption event

* tts streaming: if we get a flush with tokens pending, send the flush after the tokens

* wip
2025-01-04 16:34:01 -05:00
Dave Horton
3706aa4d98 #1020 - fix for sticky bargein (#1021) 2025-01-03 10:41:35 -05:00
Dave Horton
25f1e65f63 feed TTS in sentence chunks when streaming (#1013)
* feed TTS in sentence chunks when streaming

* tts streaming: treat a paragraph as a chunk of text, even it not ending with a line end character

* wip
2024-12-31 15:16:25 -05:00
rammohan-y
c9f0481ca6 feat/1009, sending reason in X-Reason header when AHD processor giveup (#1014)
* feat/1009, sending reason in X-Reason header when AHD processor giveup is executed

* fixed jslint error

* added an alert
2024-12-31 15:09:23 -05:00
Hoan Luu Huu
564f6c9e55 support kill dial if sd ep is media timeout (#1001)
* support kill dial if sd ep is media timeout

* support kill dial if sd ep is media timeout

* support kill dial if sd ep is media timeout

* add media timeout reason header to bye message

* wip

* wip

* make configuration for freeswitch media timeout

* make configuration for freeswitch media timeout

* wip
2024-12-23 07:19:41 -05:00
Dave Horton
02f25f8343 fix cartesia channel vars for streaming (#1012) 2024-12-20 16:48:20 -05:00
11 changed files with 177 additions and 122 deletions

View File

@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 node:18.15-alpine3.16 as base
FROM --platform=linux/amd64 node:20-alpine as base
RUN apk --update --no-cache add --virtual .builds-deps build-base python3

View File

@@ -137,6 +137,8 @@ const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = parseInt(process.env.JAMBONES_EAGERLY_P
const JAMBONES_USE_FREESWITCH_TIMER_FD = process.env.JAMBONES_USE_FREESWITCH_TIMER_FD;
const JAMBONES_DIAL_SBC_FOR_REGISTERED_USER = process.env.JAMBONES_DIAL_SBC_FOR_REGISTERED_USER || false;
const JAMBONES_MEDIA_TIMEOUT_MS = process.env.JAMBONES_MEDIA_TIMEOUT_MS || 0;
const JAMBONES_MEDIA_HOLD_TIMEOUT_MS = process.env.JAMBONES_MEDIA_HOLD_TIMEOUT_MS || 0;
module.exports = {
JAMBONES_MYSQL_HOST,
@@ -223,5 +225,7 @@ module.exports = {
JAMBONZ_DISABLE_DIAL_PAI_HEADER,
JAMBONES_DISABLE_DIRECT_P2P_CALL,
JAMBONES_USE_FREESWITCH_TIMER_FD,
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER,
JAMBONES_MEDIA_TIMEOUT_MS,
JAMBONES_MEDIA_HOLD_TIMEOUT_MS
};

View File

@@ -28,7 +28,9 @@ const {
JAMBONES_INJECT_CONTENT,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
AWS_REGION,
JAMBONES_USE_FREESWITCH_TIMER_FD
JAMBONES_USE_FREESWITCH_TIMER_FD,
JAMBONES_MEDIA_TIMEOUT_MS,
JAMBONES_MEDIA_HOLD_TIMEOUT_MS
} = require('../config');
const bent = require('bent');
const BackgroundTaskManager = require('../utils/background-task-manager');
@@ -570,7 +572,18 @@ class CallSession extends Emitter {
this._actionHookDelayProcessor = new ActionHookDelayProcessor(this.logger, opts, this, this.ep);
this._actionHookDelayProcessor.on('giveup', () => {
this.logger.info('CallSession: ActionHookDelayProcessor: giveup event - hanging up call');
this._jambonzHangup();
const {writeAlerts} = this.srf.locals;
try {
writeAlerts({
alert_type: 'bot-action-delay-giveup',
account_sid: this.accountSid,
message: 'Call terminated due to bot action delay timeout',
target_sid: this.callSid
});
} catch (err) {
this.logger.error({err}, 'Error writing bot-action-delay-giveup alert');
}
this._jambonzHangup('bot-action-delay-giveup');
if (this.wakeupResolver) {
this.logger.debug('CallSession: Giveup timer expired - waking up');
this.wakeupResolver({reason: 'noResponseGiveUp'});
@@ -853,6 +866,8 @@ class CallSession extends Emitter {
}
}
clearTtsStream() {
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
.catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
this.ttsStreamingBuffer?.clear();
}
@@ -2796,15 +2811,25 @@ Duration=${duration} `
_configMsEndpoint() {
this._enableInbandDtmfIfRequired(this.ep);
this.ep.once('destroy', this._handleMediaTimeout.bind(this));
const opts = {
...(this.onHoldMusic && {holdMusic: `shout://${this.onHoldMusic.replace(/^https?:\/\//, '')}`}),
...(JAMBONES_USE_FREESWITCH_TIMER_FD && {timer_name: 'timerfd'})
...(JAMBONES_USE_FREESWITCH_TIMER_FD && {timer_name: 'timerfd'}),
...(JAMBONES_MEDIA_TIMEOUT_MS && {media_timeout: JAMBONES_MEDIA_TIMEOUT_MS}),
...(JAMBONES_MEDIA_HOLD_TIMEOUT_MS && {media_hold_timeout: JAMBONES_MEDIA_HOLD_TIMEOUT_MS})
};
if (Object.keys(opts).length > 0) {
this.ep.set(opts);
}
}
async _handleMediaTimeout(evt) {
if (evt.reason === 'MEDIA_TIMEOUT' && !this.callGone) {
this.logger.info('CallSession:_handleMediaTimeout: received MEDIA_TIMEOUT, hangup the call');
this._jambonzHangup('Media Timeout');
}
}
async _enableInbandDtmfIfRequired(ep) {
if (ep.inbandDtmfEnabled) return;
// only enable inband dtmf detection if voip carrier dtmf_type === tones

View File

@@ -70,8 +70,12 @@ class InboundCallSession extends CallSession {
this._hangup('caller');
}
_jambonzHangup() {
this.dlg?.destroy();
_jambonzHangup(reason) {
this.dlg?.destroy({
headers: {
...(reason && {'X-Reason': reason})
}
});
// kill current task or wakeup the call session.
this._callReleased();
}

View File

@@ -273,7 +273,9 @@ class TaskDial extends Task {
this._removeDtmfDetection(this.dlg);
await this._killOutdials();
if (this.sd) {
this.sd.kill();
const byeReasonHeader = this.killReason === KillReason.MediaTimeout ? 'Media Timeout' : undefined;
this.sd.kill(byeReasonHeader);
this.sd.ep?.removeListener('destroy', this._handleMediaTimeout.bind(this));
this.sd.removeAllListeners();
this.sd = null;
}
@@ -887,6 +889,14 @@ class TaskDial extends Task {
if (this.canReleaseMedia || this.shouldExitMediaPathEntirely) {
setTimeout(this._releaseMedia.bind(this, cs, sd, this.shouldExitMediaPathEntirely), 200);
}
this.sd.ep.once('destroy', this._handleMediaTimeout.bind(this));
}
_handleMediaTimeout(evt) {
if (evt.reason === 'MEDIA_TIMEOUT' && this.sd && this.bridged) {
this.kill(this.cs, KillReason.MediaTimeout);
}
}
_bridgeEarlyMedia(sd) {

View File

@@ -98,7 +98,7 @@ class TaskLlmVoiceAgent_S2S extends Task {
async _api(ep, args) {
const res = await ep.api('uuid_voice_agent_s2s', `^^|${args.join('|')}`);
if (!res.body?.startsWith('+OK')) {
throw new Error({args}, `Error calling uuid_voice_agent_s2s: ${JSON.stringify(res.body)}`);
throw new Error(`Error calling uuid_voice_agent_s2s: ${JSON.stringify(res.body)}`);
}
}

View File

@@ -59,10 +59,13 @@ class TtsTask extends Task {
}
async setTtsStreamingChannelVars(vendor, language, voice, credentials, ep) {
const {api_key, model_id, cartesia_model_id, cartesia_voice_id} = credentials;
const {api_key, model_id} = credentials;
const {stability, similarity_boost, use_speaker_boost, style} = this.options;
let obj;
this.logger.debug({credentials},
`setTtsStreamingChannelVars: vendor: ${vendor}, language: ${language}, voice: ${voice}`);
switch (vendor) {
case 'deepgram':
obj = {
@@ -73,8 +76,8 @@ class TtsTask extends Task {
case 'cartesia':
obj = {
CARTESIA_API_KEY: api_key,
CARTESIA_TTS_STREAMING_MODEL_ID: cartesia_model_id,
CARTESIA_TTS_STREAMING_VOICE_ID: cartesia_voice_id,
CARTESIA_TTS_STREAMING_MODEL_ID: model_id,
CARTESIA_TTS_STREAMING_VOICE_ID: voice,
CARTESIA_TTS_STREAMING_LANGUAGE: language || 'en',
};
break;

View File

@@ -103,6 +103,7 @@ class BackgroundTaskManager extends Emitter {
async _initBargeIn(opts) {
let task;
try {
const copy = JSON.parse(JSON.stringify(opts));
const t = normalizeJambones(this.logger, [opts]);
task = makeTask(this.logger, t[0]);
task
@@ -121,7 +122,7 @@ class BackgroundTaskManager extends Emitter {
if (task.sticky && !this.cs.callGone && !this.cs._stopping) {
this.logger.info('BackgroundTaskManager:_initBargeIn: restarting background bargeIn');
this._bargeInHandled = false;
this.newTask('bargeIn', opts, true);
this.newTask('bargeIn', copy, true);
}
return;
})

View File

@@ -196,7 +196,8 @@
},
"KillReason": {
"Hangup": "hangup",
"Replaced": "replaced"
"Replaced": "replaced",
"MediaTimeout": "media_timeout"
},
"HookMsgTypes": [
"session:new",

View File

@@ -17,7 +17,9 @@ const HttpRequestor = require('./http-requestor');
const WsRequestor = require('./ws-requestor');
const {makeOpusFirst} = require('./sdp-utils');
const {
JAMBONES_USE_FREESWITCH_TIMER_FD
JAMBONES_USE_FREESWITCH_TIMER_FD,
JAMBONES_MEDIA_TIMEOUT_MS,
JAMBONES_MEDIA_HOLD_TIMEOUT_MS
} = require('../config');
class SingleDialer extends Emitter {
@@ -317,14 +319,19 @@ class SingleDialer extends Emitter {
/**
* kill the call in progress or the stable dialog, whichever we have
*/
async kill() {
async kill(Reason) {
this.killed = true;
if (this.inviteInProgress) await this.inviteInProgress.cancel();
else if (this.dlg && this.dlg.connected) {
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.logger.debug('SingleDialer:kill hanging up called party');
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
this.dlg.destroy();
const headers = {
...(Reason && {'X-Reason': Reason})
};
this.dlg.destroy({
headers
});
}
if (this.ep) {
this.logger.debug(`SingleDialer:kill - deleting endpoint ${this.ep.uuid}`);
@@ -335,7 +342,9 @@ class SingleDialer extends Emitter {
_configMsEndpoint() {
const opts = {
...(this.onHoldMusic && {holdMusic: `shout://${this.onHoldMusic.replace(/^https?:\/\//, '')}`}),
...(JAMBONES_USE_FREESWITCH_TIMER_FD && {timer_name: 'timerfd'})
...(JAMBONES_USE_FREESWITCH_TIMER_FD && {timer_name: 'timerfd'}),
...(JAMBONES_MEDIA_TIMEOUT_MS && {media_timeout: JAMBONES_MEDIA_TIMEOUT_MS}),
...(JAMBONES_MEDIA_HOLD_TIMEOUT_MS && {media_hold_timeout: JAMBONES_MEDIA_HOLD_TIMEOUT_MS})
};
if (Object.keys(opts).length > 0) {
this.ep.set(opts);

View File

@@ -4,36 +4,10 @@ const {
TtsStreamingEvents,
TtsStreamingConnectionStatus
} = require('../utils/constants');
const FEED_INTERVAL = 2000;
const MAX_CHUNK_SIZE = 1800;
const HIGH_WATER_BUFFER_SIZE = 5000;
const LOW_WATER_BUFFER_SIZE = 1000;
const MIN_INITIAL_WORDS = 4;
const findSentenceBoundary = (text, limit) => {
const sentenceEndRegex = /[.!?](?=\s|$)/g;
let lastSentenceBoundary = -1;
let match;
while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
/* Ensure it's not a decimal point (e.g., "3.14") */
if (match.index === 0 || !/\d$/.test(text[match.index - 1])) {
lastSentenceBoundary = match.index + 1; // Include the punctuation
}
}
return lastSentenceBoundary;
};
const findWordBoundary = (text, limit) => {
const wordBoundaryRegex = /\s+/g;
let lastWordBoundary = -1;
let match;
while ((match = wordBoundaryRegex.exec(text)) && match.index < limit) {
lastWordBoundary = match.index;
}
return lastWordBoundary;
};
const HIGH_WATER_BUFFER_SIZE = 1000;
const LOW_WATER_BUFFER_SIZE = 200;
const TIMEOUT_RETRY_MSECS = 3000;
class TtsStreamingBuffer extends Emitter {
constructor(cs) {
@@ -46,7 +20,7 @@ class TtsStreamingBuffer extends Emitter {
this._isFull = false;
this._connectionStatus = TtsStreamingConnectionStatus.NotConnected;
this._flushPending = false;
this._countSendsInThisTurn = 0;
this.timer = null;
}
get isEmpty() {
@@ -110,14 +84,13 @@ class TtsStreamingBuffer extends Emitter {
return {status: 'failed', reason: `connection to ${this.vendor} failed`};
}
const starting = this.tokens === '';
const displayedTokens = tokens.length <= 40 ? tokens : tokens.substring(0, 40);
const totalLength = tokens.length;
/* if we crossed the high water mark, reject the request */
if (this.tokens.length + totalLength > HIGH_WATER_BUFFER_SIZE) {
this.logger.info(
`TtsStreamingBuffer:bufferTokensTTS buffer is full, rejecting request to buffer ${totalLength} tokens`);
`TtsStreamingBuffer throttling: buffer is full, rejecting request to buffer ${totalLength} tokens`);
if (!this._isFull) {
this._isFull = true;
@@ -127,23 +100,11 @@ class TtsStreamingBuffer extends Emitter {
}
this.logger.debug(
`TtsStreamingBuffer:bufferTokens "${displayedTokens}" (length: ${totalLength}), starting? ${starting}`
`TtsStreamingBuffer:bufferTokens "${displayedTokens}" (length: ${totalLength}), starting? ${this.isEmpty}`
);
this.tokens += (tokens || '');
const leftoverTokens = await this._feedTokens();
/* do we need to start a timer to periodically feed tokens to the endpoint? */
if (starting && leftoverTokens > 0) {
assert(!this.timer);
this.timer = setInterval(async() => {
const remaining = await this._feedTokens();
if (remaining === 0) {
clearInterval(this.timer);
this.timer = null;
}
}, FEED_INTERVAL);
}
await this._feedTokens();
return {status: 'ok'};
}
@@ -156,10 +117,14 @@ class TtsStreamingBuffer extends Emitter {
return;
}
else if (this._connectionStatus === TtsStreamingConnectionStatus.Connected) {
this._countSendsInThisTurn = 0;
this._api(this.ep, [this.ep.uuid, 'flush'])
.catch((err) => this.logger.info({err},
`TtsStreamingBuffer:flush Error flushing TTS streaming: ${JSON.stringify(err)}`));
if (this.size === 0) {
this._doFlush();
}
else {
/* we have tokens queued, so flush after they have been sent */
this._pendingFlush = true;
}
}
}
@@ -176,13 +141,14 @@ class TtsStreamingBuffer extends Emitter {
}
/**
* Send the next chunk of tokens to the endpoint (max 2000 chars)
* Return the number of tokens left in the buffer.
* Send tokens to the TTS engine in sentence chunks for best playout
*/
async _feedTokens() {
async _feedTokens(handlingTimeout = false) {
this.logger.debug({tokens: this.tokens}, '_feedTokens');
try {
/* are we in a state where we can feed tokens to the TTS? */
if (!this.cs.isTtsStreamOpen || !this.ep || !this.tokens) {
this.logger.debug('TTS stream is not open or no tokens to send');
return this.tokens?.length || 0;
@@ -191,86 +157,68 @@ class TtsStreamingBuffer extends Emitter {
if (this._connectionStatus === TtsStreamingConnectionStatus.NotConnected ||
this._connectionStatus === TtsStreamingConnectionStatus.Failed) {
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not connected');
return this.tokens.length;
return;
}
if (this._connectionStatus === TtsStreamingConnectionStatus.Connecting) {
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not ready, waiting for connect');
return this.tokens.length;
}
/**
* Rules:
* 1. If this is our first send, we must have at least N words
* 2. Otherwise, must EITHER have N words OR be the ending of a sentence
*
* When sending, send the max size possible, capped at a limit to avoid overwhelming the server.
*/
/* must have at least N words, or be the ending of a sentence */
const words = this.tokens.split(' ').length;
if (words < MIN_INITIAL_WORDS) {
const endsWithPunctuation = /[.!?]$/.test(this.tokens);
if (!endsWithPunctuation || this._countSendsInThisTurn === 0) {
this.logger.debug(`TtsStreamingBuffer:_feedTokens: only ${words} words to send, waiting for more`);
return this.tokens.length;
}
return;
}
/* must send at least one sentence */
const limit = Math.min(MAX_CHUNK_SIZE, this.tokens.length);
let chunkEnd = findSentenceBoundary(this.tokens, limit);
if (chunkEnd === -1) {
this.logger.debug('TtsStreamingBuffer:_feedTokens: no sentence boundary found, look for word boundary');
chunkEnd = findWordBoundary(this.tokens, limit);
}
if (chunkEnd === -1) {
chunkEnd = limit;
if (chunkEnd <= 0) {
if (handlingTimeout) {
/* on a timeout we've left some tokens sitting around, so be more aggressive now in sending them */
chunkEnd = findWordBoundary(this.tokens, limit);
if (chunkEnd <= 0) {
this.logger.debug('TtsStreamingBuffer:_feedTokens: no word boundary found');
this._setTimerIfNeeded();
return;
}
}
else {
/* if we just received tokens, we wont send unless we have at least a full sentence */
this.logger.debug('TtsStreamingBuffer:_feedTokens: no sentence boundary found');
this._setTimerIfNeeded();
return;
}
}
const chunk = this.tokens.slice(0, chunkEnd);
this.tokens = this.tokens.slice(chunkEnd); // Remove sent chunk
this.tokens = this.tokens.slice(chunkEnd);
/* freeswitch looks for sequence of 2 newlines to determine end of message, so insert a space */
const modifiedChunk = chunk.replace(/\n\n/g, '\n \n');
await this._api(this.ep, [this.ep.uuid, 'send', modifiedChunk]);
this.logger.debug(`TtsStreamingBuffer:_feedTokens: sent ${chunk.length}, remaining: ${this.tokens.length}`);
if (modifiedChunk.length > 0) {
try {
this._countSendsInThisTurn++;
this.logger.debug({tokens: modifiedChunk},
`TtsStreamingBuffer:_feedTokens: sending tokens, in send#${this._countSendsInThisTurn}`);
await this._api(this.ep, [this.ep.uuid, 'send', modifiedChunk]);
} catch (err) {
this.logger.info({err}, 'TtsStreamingBuffer:_feedTokens Error sending TTS chunk');
}
if (this._pendingFlush) {
this._doFlush();
this._pendingFlush = false;
}
this.logger.debug(`TtsStreamingBuffer:_feedTokens: sent ${chunk.length}, remaining: ${this.tokens.length}`);
if (this.isFull && this.tokens.length <= LOW_WATER_BUFFER_SIZE) {
this.logger.info('TtsStreamingBuffer:_feedTokens TTS streaming buffer is no longer full');
this._isFull = false;
this.emit(TtsStreamingEvents.Resume);
}
if (this.isFull && this.tokens.length <= LOW_WATER_BUFFER_SIZE) {
this.logger.info('TtsStreamingBuffer throttling: TTS streaming buffer is no longer full - resuming');
this._isFull = false;
this.emit(TtsStreamingEvents.Resume);
}
} catch (err) {
this.logger.info({err}, 'TtsStreamingBuffer:_feedTokens Error sending TTS chunk');
this.tokens = '';
}
if (0 === this.tokens.length && this.timer) {
clearTimeout(this.timer);
this.timer = null;
}
return this.tokens.length;
return;
}
async _api(ep, args) {
const apiCmd = `uuid_${this.vendor}_tts_streaming`;
const res = await ep.api(apiCmd, `^^|${args.join('|')}`);
if (!res.body?.startsWith('+OK')) {
throw new Error({args}, `Error calling ${apiCmd}: ${res.body}`);
this.logger.info({args}, `Error calling ${apiCmd}: ${res.body}`);
throw new Error(`Error calling ${apiCmd}: ${res.body}`);
}
}
@@ -281,6 +229,12 @@ class TtsStreamingBuffer extends Emitter {
this.emit(TtsStreamingEvents.ConnectFailure, {vendor});
}
_doFlush() {
this._api(this.ep, [this.ep.uuid, 'flush'])
.catch((err) => this.logger.info({err},
`TtsStreamingBuffer:_doFlush Error flushing TTS streaming: ${JSON.stringify(err)}`));
}
async _onConnect(vendor) {
this.logger.info(`streaming tts connection made to ${vendor}`);
this._connectionStatus = TtsStreamingConnectionStatus.Connected;
@@ -293,6 +247,18 @@ class TtsStreamingBuffer extends Emitter {
}
}
_setTimerIfNeeded() {
if (this.tokens.length > 0 && !this.timer) {
this.timer = setTimeout(this._onTimeout.bind(this), TIMEOUT_RETRY_MSECS);
}
}
_onTimeout() {
this.logger.info('TtsStreamingBuffer:_onTimeout');
this.timer = null;
this._feedTokens(true);
}
_onTtsEmpty(vendor) {
this.emit(TtsStreamingEvents.Empty, {vendor});
}
@@ -324,4 +290,36 @@ class TtsStreamingBuffer extends Emitter {
}
}
const findSentenceBoundary = (text, limit) => {
// Match traditional sentence boundaries or double newlines
const sentenceEndRegex = /[.!?](?=\s|$)|\n\n/g;
let lastSentenceBoundary = -1;
let match;
while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
const precedingText = text.slice(0, match.index).trim(); // Extract text before the match and trim whitespace
if (precedingText.length > 0) { // Check if there's actual content
if (
match[0] === '\n\n' || // It's a double newline
(match.index === 0 || !/\d$/.test(text[match.index - 1])) // Standard punctuation rules
) {
lastSentenceBoundary = match.index + (match[0] === '\n\n' ? 2 : 1); // Include the boundary
}
}
}
return lastSentenceBoundary;
};
const findWordBoundary = (text, limit) => {
const wordBoundaryRegex = /\s+/g;
let lastWordBoundary = -1;
let match;
while ((match = wordBoundaryRegex.exec(text)) && match.index < limit) {
lastWordBoundary = match.index;
}
return lastWordBoundary;
};
module.exports = TtsStreamingBuffer;