Compare commits

...

36 Commits

Author SHA1 Message Date
Dave Horton
8879d8e3e5 fix cartesia channel vars for streaming 2024-12-20 16:40:54 -05:00
Hoan Luu Huu
13ef89d605 support elevenlabs tts stream (#1011)
* support elevenlabs tts stream

* wip

* wip
2024-12-20 09:50:13 -05:00
Dave Horton
d05e470867 remove hardcoding of openai model 2024-12-19 18:42:57 -05:00
Hoan Luu Huu
17250f8386 support cartesia tts (#1008)
* support cartesia tts

* update speech util version

* update speech utils version
2024-12-19 07:35:47 -05:00
Dave Horton
ba3f46df64 Feat/tts streaming (#994)
* wip

* add TtsStreamingBuffer class to abstract handling of streaming tokens

* wip

* add throttling support

* support background ttsStream (#995)

* wip

* add TtsStreamingBuffer class to abstract handling of streaming tokens

* wip

* support background ttsStream

* wip

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>

* wip

* dont send if we have nothing to send

* initial testing with cartesia

* wip

---------

Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
2024-12-18 14:44:37 -05:00
RJ Burnham
f37e1540ee Make voicemail hints case insensitive (#1007) 2024-12-13 13:42:29 -05:00
Dave Horton
5e04db82bf Feat/deepgram voice agent (#1006)
* wip

* wip

* wip
2024-12-13 10:05:23 -05:00
Dave Horton
0aa37a83ae Feat/handle 3pcc invite (#1005)
* wip

* wip

* linting
2024-12-12 18:39:15 -05:00
Hoan Luu Huu
c29ab0d858 support referBy display name (#1000)
* support referBy display name

* wip

* update verb specification
2024-12-11 12:46:29 -05:00
Sam Machin
71d4c90cbc catch error (#1002)
* catch error

* remove notifyTaskDone
2024-12-11 12:34:44 -05:00
Hoan Luu Huu
a929a649f9 fix ConfirmCallSession cannot be played (#993)
* fix ConfirmCallSession cannot be played

* fix review comments

* fix review comments
2024-12-10 19:36:42 -05:00
Dave Horton
3bb4f1a29f fix #998 incorrectly sending final transcript with is_final=false (#999) 2024-12-10 18:48:02 -05:00
Hoan Luu Huu
54cc76606b fix cannot replace endpoint for adulting session (#992)
* fix cannot replace endpoint for adulting session

* fix cannot replace endpoint for adulting session
2024-12-06 07:51:24 -05:00
rammohan-y
0458bb7d6c Feat/884: Capture system_alert when feature-server is online or offline (#950)
* writing alerts during startup and shutdown of feature-server

* feat/884: created constants for system component name and state

* feat/88: added 0.2.11 version of time-series

* feat/884: renamed constant, and added GracefulShutdownInProgress system alert
2024-12-05 09:23:03 -05:00
Sam Machin
dce4fe1f82 Fix/986 (#990)
* throw new NonFatalTask error on play file not found

* linting

* make SpeechCredentialError subclass of NonFatalTask error

* cleanup

* Update action-hook-delay.js

* bump fsmrf version

* linting and package-lock

* Update package-lock.json

* update error

* only throw on fs error "File not found"

* add alert

* update time-series dep

* Update package-lock.json

* linting

* Update play.js

* remove stack trace from error message

* fix error formatting
2024-12-04 05:47:49 -05:00
Hoan Luu Huu
e96c35d571 fixed iamrole from sessionToken to securityToken (#988)
* fixed iamrole from sessionToken to securityToken

* wip

* support get aws credential from instance profile
2024-11-29 21:58:42 -05:00
Hoan Luu Huu
070671a3fb support send refer custom header to referhook (#981) 2024-11-28 08:34:34 -05:00
rammohan-y
efdb56f0a0 feat/971 - fix to allow hints objects array (#987) 2024-11-28 07:25:10 -05:00
Hoan Luu Huu
e2edbb4a5b support enable dtmf tone (#976)
* support enable dtmf tone

* wip

* wip
2024-11-26 20:25:48 -05:00
Markus Frindt
3a6d63a1c6 Fix the issue for outbound calls that always the None credentials wer… (#984)
* Fix the issue for outbound calls that always the None credentials were used. session:new for rest dial did not contain recognizer.label and synthesizer.label

* update comment

---------

Co-authored-by: mfrindt <m.frindt@cognigy.com>
2024-11-26 10:26:20 -05:00
rammohan-y
c874ab8100 feat/975: fixed continuous asr not stopping when asrDtmfTerminationDi… (#977)
* feat/975: fixed continuous asr not stopping when asrDtmfTerminationDigit is configured

* feat/975: giving first preference to asrDtmfTerminationDigit if there is already ASR happened
2024-11-26 08:23:11 -05:00
Dave Horton
24a66fed64 wip (#979) 2024-11-19 09:37:00 -05:00
Hoan Luu Huu
c8c3738ae8 custom stt vendor ws connection should not be closed in asrTimeout (#973) 2024-11-18 10:17:31 -05:00
Dave Horton
c1330d4651 fix transcribe fixes for speechmatics (#978)
* fix transcribe fixes for speechmatics

* update to verb-specs with fixes for speechmatics

* add support for speechmatics translation

* add handlers for receiving translations

* call translation hookd

* gather: no need to restart speechmatics after a final transcript during continuous asr

* graceful shutdown

* wip

* wip

* wip

* wip

* wip
2024-11-16 10:21:04 -05:00
Hoan Luu Huu
27f3a4b520 support SIP Privacy (#970) 2024-11-15 07:11:47 -05:00
Hoan Luu Huu
594c867192 unbridge dial ep with caller ep to avoid media release when referHook (#972) 2024-11-14 19:30:49 -05:00
Hoan Luu Huu
71c475e758 allow dub as http updateCall request (#974) 2024-11-14 07:20:33 -05:00
RJ Burnham
22ef201360 Add support to export to more than one otel platform. (#969)
* Add support to export to more than one otel platform.

This is helpful for if you want to keep using the bundled jaeger
support in the web console AND send to external OTLP based platform
(such as Axiom.co!).

* Lint issues and cleanup.
2024-11-13 10:25:02 -05:00
Hoan Luu Huu
5be3a910ad fix google custom voice can not be used without voice cloning key (#968) 2024-11-11 07:24:40 -05:00
Dave Horton
7615509e0b update test to use drachtio/drachtio-freeswitch-mrf:0.9.2-4 with aws_transcribe_ws fix (#964) 2024-11-08 09:52:26 -05:00
Dave Horton
851c071345 fix for #962 (#963) 2024-11-08 07:12:08 -05:00
rammohan-y
7911459c8c feat/940 stopped calling updateSpeechCredentialLastUsed (#944) 2024-11-05 15:19:08 -05:00
Hoan Luu Huu
be258950b0 feature server should send USER call to the sbc sip that is connect with the user (#949)
* feature server should send USER call to the sbc sip that is connect with the user

* feature server should send USER call to the sbc sip that is connect with the user

* feature server should send USER call to the sbc sip that is connect with the user

* fix review comment

* add env variable to enable the feature

* add env variable to enable the feature

* add env variable to enable the feature

* minor test update

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-11-05 15:14:04 -05:00
Hoan Luu Huu
0520386a1e fixed dial verb should use calling id from From header (#958)
* fixed dial verb should use calling id from From header

* fix review comment

* wip
2024-11-05 13:48:35 -05:00
Hoan Luu Huu
a4b1b22324 update speech utils version (#957) 2024-11-04 08:04:19 -05:00
Hoan Luu Huu
e800cca961 support google voice cloning (#956)
* support google voice cloning

* wip
2024-11-04 07:10:52 -05:00
41 changed files with 2115 additions and 360 deletions

3
.gitignore vendored
View File

@@ -42,4 +42,5 @@ ecosystem.config.js
test/credentials/*.json
run-tests.sh
run-coverage.sh
.vscode
.vscode
.env

47
app.js
View File

@@ -25,10 +25,22 @@ const opts = {
};
const pino = require('pino');
const logger = pino(opts, pino.destination({sync: false}));
const {LifeCycleEvents, FS_UUID_SET_NAME} = require('./lib/utils/constants');
const {LifeCycleEvents, FS_UUID_SET_NAME, SystemState, FEATURE_SERVER} = require('./lib/utils/constants');
const installSrfLocals = require('./lib/utils/install-srf-locals');
installSrfLocals(srf, logger);
const writeSystemAlerts = srf.locals?.writeSystemAlerts;
if (writeSystemAlerts) {
writeSystemAlerts({
system_component: FEATURE_SERVER,
state : SystemState.Online,
fields : {
detail: `feature-server with process_id ${process.pid} started`,
host: srf.locals?.ipv4
}
});
}
const {
initLocals,
createRootSpan,
@@ -100,12 +112,19 @@ createHttpListener(logger, srf)
});
setInterval(async() => {
const monInterval = setInterval(async() => {
srf.locals.stats.gauge('fs.sip.calls.count', sessionTracker.count);
// Checking system log level
const systemInformation = await srf.locals.dbHelpers.lookupSystemInformation();
if (systemInformation && systemInformation.log_level) {
logger.level = systemInformation.log_level;
try {
const systemInformation = await srf.locals.dbHelpers.lookupSystemInformation();
if (systemInformation && systemInformation.log_level) {
logger.level = systemInformation.log_level;
}
} catch (err) {
if (process.env.NODE_ENV === 'test') {
clearInterval(monInterval);
logger.error('all tests complete');
}
else logger.error({err}, 'Error checking system log level in database');
}
}, 20000);
@@ -117,13 +136,25 @@ const disconnect = () => {
srf.locals.mediaservers?.forEach((ms) => ms.disconnect());
});
};
process.on('SIGTERM', handle);
process.on('SIGINT', handle);
function handle(signal) {
async function handle(signal) {
const {removeFromSet} = srf.locals.dbHelpers;
srf.locals.disabled = true;
logger.info(`got signal ${signal}`);
const writeSystemAlerts = srf.locals?.writeSystemAlerts;
if (writeSystemAlerts) {
// it has to be synchronous call, or else by the time system saves the app terminates
await writeSystemAlerts({
system_component: FEATURE_SERVER,
state : SystemState.Offline,
fields : {
detail: `feature-server with process_id ${process.pid} stopped, signal ${signal}`,
host: srf.locals?.ipv4
}
});
}
const setName = `${(JAMBONES_CLUSTER_ID || 'default')}:active-fs`;
const fsServiceUrlSetName = `${(JAMBONES_CLUSTER_ID || 'default')}:fs-service-url`;
if (setName && srf.locals.localSipAddress) {

View File

@@ -136,6 +136,7 @@ const JAMBONES_DISABLE_DIRECT_P2P_CALL = process.env.JAMBONES_DISABLE_DIRECT_P2P
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = parseInt(process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO, 10) || 0;
const JAMBONES_USE_FREESWITCH_TIMER_FD = process.env.JAMBONES_USE_FREESWITCH_TIMER_FD;
const JAMBONES_DIAL_SBC_FOR_REGISTERED_USER = process.env.JAMBONES_DIAL_SBC_FOR_REGISTERED_USER || false;
module.exports = {
JAMBONES_MYSQL_HOST,
@@ -221,5 +222,6 @@ module.exports = {
JAMBONZ_RECORD_WS_PASSWORD,
JAMBONZ_DISABLE_DIAL_PAI_HEADER,
JAMBONES_DISABLE_DIRECT_P2P_CALL,
JAMBONES_USE_FREESWITCH_TIMER_FD
JAMBONES_USE_FREESWITCH_TIMER_FD,
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER
};

View File

@@ -14,6 +14,8 @@ const RootSpan = require('../../utils/call-tracer');
const dbUtils = require('../../utils/db-utils');
const { mergeSdpMedia, extractSdpMedia } = require('../../utils/sdp-utils');
const { createCallSchema, customSanitizeFunction } = require('../schemas/create-call');
const { selectHostPort } = require('../../utils/network');
const { JAMBONES_DIAL_SBC_FOR_REGISTERED_USER } = require('../../config');
const removeNullProperties = (obj) => (Object.keys(obj).forEach((key) => obj[key] === null && delete obj[key]), obj);
const removeNulls = (req, res, next) => {
@@ -65,7 +67,7 @@ router.post('/',
lookupAppBySid
} = srf.locals.dbHelpers;
const {getSBC, getFreeswitch} = srf.locals;
const sbcAddress = getSBC();
let sbcAddress = getSBC();
if (!sbcAddress) throw new Error('no available SBCs for outbound call creation');
const target = restDial.to;
const opts = {
@@ -140,6 +142,16 @@ router.post('/',
}
}
// find handling sbc sip for called user
if (JAMBONES_DIAL_SBC_FOR_REGISTERED_USER && target.type === 'user') {
const { registrar } = srf.locals.dbHelpers;
const reg = await registrar.query(target.name);
if (reg) {
sbcAddress = selectHostPort(logger, reg.sbcAddress, 'tcp')[1];
}
//sbc outbound return 404 Notfound to handle case called user is not reigstered.
}
/**
* trunk isn't specified,
* check if from-number matches any existing numbers on Jambonz
@@ -196,10 +208,13 @@ router.post('/',
/**
* create our application object -
* not from the database as per an inbound call,
* but from the provided params in the request
* we merge the inbound call application,
* with the provided app params from the request body
*/
const app = req.body;
const app = {
...application,
...req.body
};
/**
* attach our requestor and notifier objects

View File

@@ -336,7 +336,9 @@ module.exports = function(srf, logger) {
if (arr) {
const google_custom_voice_sid = arr[1];
const [custom_voice] = await lookupGoogleCustomVoice(google_custom_voice_sid);
if (custom_voice) {
//google voice cloning key has size 200kb, jambonz should not resolve the voice here that the app's calling
//webhook will receive big payload, tts-task should resolve the voice later.
if (!custom_voice.use_voice_cloning_key) {
app2.speech_synthesis_voice = {
reportedUsage: custom_voice.reported_usage,
model: custom_voice.model

View File

@@ -45,8 +45,10 @@ class AdultingCallSession extends CallSession {
return this.sd.ep;
}
/* see note above */
set ep(newEp) {}
// When adulting session kicked from conference, replaceEndpoint is a must
set ep(newEp) {
this.sd.ep = newEp;
}
get callSid() {
return this.callInfo.callSid;

View File

@@ -2,13 +2,15 @@ const Emitter = require('events');
const fs = require('fs');
const {
CallDirection,
MediaPath,
TaskPreconditions,
CallStatus,
TaskName,
KillReason,
RecordState,
AllowedSipRecVerbs,
AllowedConfirmSessionVerbs
AllowedConfirmSessionVerbs,
TtsStreamingEvents
} = require('../utils/constants');
const moment = require('moment');
const assert = require('assert');
@@ -20,6 +22,7 @@ const listTaskNames = require('../utils/summarize-tasks');
const HttpRequestor = require('../utils/http-requestor');
const WsRequestor = require('../utils/ws-requestor');
const ActionHookDelayProcessor = require('../utils/action-hook-delay');
const TtsStreamingBuffer = require('../utils/tts-streaming-buffer');
const {parseUri} = require('drachtio-srf');
const {
JAMBONES_INJECT_CONTENT,
@@ -29,9 +32,10 @@ const {
} = require('../config');
const bent = require('bent');
const BackgroundTaskManager = require('../utils/background-task-manager');
const dbUtils = require('../utils/db-utils');
const BADPRECONDITIONS = 'preconditions not met';
const CALLER_CANCELLED_ERR_MSG = 'Response not sent due to unknown transaction';
const { NonFatalTaskError} = require('../utils/error');
const sqlRetrieveQueueEventHook = `SELECT * FROM webhooks
WHERE webhook_sid =
(
@@ -398,33 +402,41 @@ class CallSession extends Emitter {
return this.application.transferredCall === true;
}
/**
* returns true if this session is an inbound call session
*/
get isInboundCallSession() {
return this.constructor.name === 'InboundCallSession';
}
/**
* returns true if this session is a ConfirmCallSession
*/
get isAdultingCallSession() {
return this.constructor.name === 'AdultingCallSession';
}
/**
* returns true if this session is a ConfirmCallSession
*/
get isConfirmCallSession() {
return this.constructor.name === 'ConfirmCallSession';
}
/**
* returns true if this session is a SipRecCallSession
*/
get isSipRecCallSession() {
return this.constructor.name === 'SipRecCallSession';
}
/**
* returns true if this session is a SmsCallSession
*/
get isSmsCallSession() {
return this.constructor.name === 'SmsCallSession';
}
get isRestCallSession() {
return this.constructor.name === 'RestCallSession';
}
get InboundCallSession() {
return this.constructor.name === 'InboundCallSession';
}
get isNormalCallSession() {
return this.constructor.name === 'InboundCallSession' || this.constructor.name === 'RestCallSession';
}
get is3pccInvite() {
return this.isInboundCallSession && this.req?.body?.length === 0;
}
get webhook_secret() {
return this.accountInfo?.account?.webhook_secret;
@@ -438,6 +450,10 @@ class CallSession extends Emitter {
return this.backgroundTaskManager.isTaskRunning('bargeIn');
}
get isTtsStreamEnabled() {
return this.backgroundTaskManager.isTaskRunning('ttsStream');
}
get isListenEnabled() {
return this.backgroundTaskManager.isTaskRunning('listen');
}
@@ -500,6 +516,10 @@ class CallSession extends Emitter {
this._sipRequestWithinDialogHook = url;
}
get isTtsStreamOpen() {
return this.currentTask?.isStreamingTts ||
this.backgroundTaskManager.getTask('ttsStream')?.isStreamingTts;
}
// Bot Delay (actionHook delayed)
get actionHookDelayEnabled() {
return this._actionHookDelayEnabled;
@@ -574,6 +594,25 @@ class CallSession extends Emitter {
}
}
getTsStreamingVendor() {
let v;
if (this.currentTask?.isStreamingTts) {
const {vendor} = this.currentTask.getTtsVendorData(this);
v = vendor;
}
else if (this.backgroundTaskManager.getTask('ttsStream')?.isStreamingTts) {
const {vendor} = this.backgroundTaskManager.getTask('ttsStream').getTtsVendorData(this);
v = vendor;
}
return v;
}
get appIsUsingWebsockets() {
return this.requestor instanceof WsRequestor;
}
/* end of getters and setters */
async clearOrRestoreActionHookDelayProcessor() {
if (this._actionHookDelayProcessor) {
await this._actionHookDelayProcessor.stop();
@@ -791,6 +830,36 @@ class CallSession extends Emitter {
}
}
async enableBackgroundTtsStream(say) {
try {
if (this.isTtsStreamEnabled) {
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream currently enabled, ignoring request');
} else if (this.appIsUsingWebsockets && this.isNormalCallSession) {
await this.backgroundTaskManager.newTask('ttsStream', say);
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream enabled');
} else {
this.logger.debug(
'CallSession:enableBackgroundTtsStream - ignoring request as call does not have required conditions');
}
} catch (err) {
this.logger.info({err, say}, 'CallSession:enableBackgroundTtsStream - Error creating background tts stream task');
}
}
disableTtsStream() {
if (this.isTtsStreamEnabled) {
this.backgroundTaskManager.stop('ttsStream');
this.logger.debug('CallSession:disableTtsStream - ttsStream disabled');
}
}
clearTtsStream() {
this.ttsStreamingBuffer?.clear();
}
startTtsStream() {
this.ttsStreamingBuffer?.start();
}
async enableBotMode(gather, autoEnable) {
try {
let task;
@@ -979,6 +1048,14 @@ class CallSession extends Emitter {
options: credential.options
};
}
else if ('cartesia' === vendor) {
return {
api_key: credential.api_key,
model_id: credential.model_id,
embedding: credential.embedding,
options: credential.options
};
}
else if ('rimelabs' === vendor) {
return {
api_key: credential.api_key,
@@ -1040,6 +1117,27 @@ class CallSession extends Emitter {
async exec() {
this.logger.info({tasks: listTaskNames(this.tasks)}, `CallSession:exec starting ${this.tasks.length} tasks`);
// calculate if inbandDTMF tone is used
const voip_carrier_sid = this.req?.has('X-Voip-Carrier-Sid') ? this.req.get('X-Voip-Carrier-Sid') :
this.req?.has('X-Requested-Carrier-Sid') ? this.req.get('X-Requested-Carrier-Sid') : null;
if (voip_carrier_sid) {
const {lookupVoipCarrierBySid} = dbUtils(this.logger, this.srf);
const [voipCarrier] = await lookupVoipCarrierBySid(voip_carrier_sid);
this.inbandDtmfEnabled = voipCarrier?.dtmf_type === 'tones';
}
if (this.isNormalCallSession) {
this.ttsStreamingBuffer = new TtsStreamingBuffer(this);
this.ttsStreamingBuffer.on(TtsStreamingEvents.Empty, this._onTtsStreamingEmpty.bind(this));
this.ttsStreamingBuffer.on(TtsStreamingEvents.Pause, this._onTtsStreamingPause.bind(this));
this.ttsStreamingBuffer.on(TtsStreamingEvents.Resume, this._onTtsStreamingResume.bind(this));
this.ttsStreamingBuffer.on(TtsStreamingEvents.ConnectFailure, this._onTtsStreamingConnectFailure.bind(this));
}
else {
this.logger.info(`CallSession:exec - not a normal call session: ${this.constructor.name}`);
}
while (this.tasks.length && !this.callGone) {
const taskNum = ++this.taskIdx;
const stackNum = this.stackIdx;
@@ -1080,6 +1178,8 @@ class CallSession extends Emitter {
this.currentTask = null;
if (err.message?.includes(BADPRECONDITIONS)) {
this.logger.info(`CallSession:exec task #${stackNum}:${taskNum}: ${task.name}: ${err.message}`);
} else if (err instanceof NonFatalTaskError) {
this.logger.error(err, `Error executing task #${stackNum}:${taskNum}: ${task.name}`);
}
else {
this.logger.error(err, `Error executing task #${stackNum}:${taskNum}: ${task.name}`);
@@ -1589,6 +1689,15 @@ Duration=${duration} `
this.logger.info({response}, '_lccBoostAudioSignal: response from freeswitch');
}
async _lccMediaPath(desiredPath) {
const task = this.currentTask;
if (!task || task.name !== TaskName.Dial) {
return this.logger.info('CallSession:_lccMediaPath - invalid command since we are not in a dial verb');
}
task.updateMediaPath(desiredPath)
.catch((err) => this.logger.error(err, 'CallSession:_lccMediaPath'));
}
_lccToolOutput(tool_call_id, opts, callSid) {
// only valid if we are in an LLM verb
const task = this.currentTask;
@@ -1612,6 +1721,39 @@ Duration=${duration} `
.catch((err) => this.logger.error(err, 'CallSession:_lccLlmUpdate'));
}
async _lccTtsTokens(opts) {
const {id, tokens} = opts;
if (id === undefined) {
this.logger.info({opts}, 'CallSession:_lccTtsTokens - invalid command since id is missing');
return;
}
else if (tokens === undefined) {
this.logger.info({opts}, 'CallSession:_lccTtsTokens - invalid command since id is missing');
return this.requestor.request('tts:tokens-result', '/tokens-result', {
id,
status: 'failed',
reason: 'missing tokens'
}).catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskStatus - Error sending'));
}
let res;
try {
res = await this.ttsStreamingBuffer?.bufferTokens(tokens);
this.logger.info({id, res}, 'CallSession:_lccTtsTokens - tts:tokens-result');
} catch (err) {
this.logger.info(err, 'CallSession:_lccTtsTokens');
}
this.requestor.request('tts:tokens-result', '/tokens-result', {id, ...res})
.catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskStatus - Error sending'));
}
_lccTtsFlush(opts) {
this.ttsStreamingBuffer?.flush(opts);
}
_lccTtsClear(opts) {
this.ttsStreamingBuffer?.clear(opts);
}
/**
* perform call hangup by jambonz
@@ -1667,11 +1809,14 @@ Duration=${duration} `
return this._lccConferenceParticipantAction(opts.conferenceParticipantAction);
}
else if (opts.dub) {
return this._lccDub(opts);
return this._lccDub(opts.dub, callSid);
}
else if (opts.boostAudioSignal) {
return this._lccBoostAudioSignal(opts, callSid);
}
else if (opts.media_path) {
return this._lccMediaPath(opts.media_path, callSid);
}
else if (opts.llm_tool_output) {
return this._lccToolOutput(opts.tool_call_id, opts.llm_tool_output, callSid);
}
@@ -1975,6 +2120,13 @@ Duration=${duration} `
});
break;
case 'media:path':
this._lccMediaPath(data, call_sid)
.catch((err) => {
this.logger.info({err, data}, 'CallSession:_onCommand - error setting media path');
});
break;
case 'llm:tool-output':
this._lccToolOutput(tool_call_id, data, call_sid);
break;
@@ -1983,6 +2135,18 @@ Duration=${duration} `
this._lccLlmUpdate(data, call_sid);
break;
case 'tts:tokens':
this._lccTtsTokens(data);
break;
case 'tts:flush':
this._lccTtsFlush(data);
break;
case 'tts:clear':
this._lccTtsClear(data);
break;
default:
this.logger.info(`CallSession:_onCommand - invalid command ${command}`);
}
@@ -2040,6 +2204,8 @@ Duration=${duration} `
};
}
this._enableInbandDtmfIfRequired(this.ep);
// we are going from an early media connection to answer
if (this.direction === CallDirection.Inbound) {
// only do this for inbound call.
@@ -2063,7 +2229,6 @@ Duration=${duration} `
//ep.cs = this;
this.ep = ep;
this.logger.info(`allocated endpoint ${ep.uuid}`);
this._configMsEndpoint();
this.ep.on('destroy', () => {
@@ -2176,6 +2341,8 @@ Duration=${duration} `
// close all background tasks
this.backgroundTaskManager.stopAll();
this.clearOrRestoreActionHookDelayProcessor().catch((err) => {});
this.ttsStreamingBuffer?.stop();
}
/**
@@ -2225,22 +2392,43 @@ Duration=${duration} `
},
localSdp: this.ep.local.sdp
});
this.logger.debug('answered call');
this.dlg.on('destroy', this._callerHungup.bind(this));
this.wrapDialog(this.dlg);
this.dlg.callSid = this.callSid;
this.emit('callStatusChange', {sipStatus: 200, sipReason: 'OK', callStatus: CallStatus.InProgress});
if (this.recordOptions && this.recordState === RecordState.RecordingOff) {
this.startRecording();
const tidyUp = () => {
this.dlg.on('destroy', this._callerHungup.bind(this));
this.wrapDialog(this.dlg);
this.dlg.callSid = this.callSid;
this.emit('callStatusChange', {sipStatus: 200, sipReason: 'OK', callStatus: CallStatus.InProgress});
if (this.recordOptions && this.recordState === RecordState.RecordingOff) {
this.startRecording();
}
this.dlg.on('modify', this._onReinvite.bind(this));
this.dlg.on('refer', this._onRefer.bind(this));
if (this.sipRequestWithinDialogHook) {
this.dlg.on('info', this._onRequestWithinDialog.bind(this));
this.dlg.on('message', this._onRequestWithinDialog.bind(this));
}
this.logger.debug(`CallSession:propagateAnswer - answered callSid ${this.callSid}`);
};
if (this.is3pccInvite) {
return new Promise((resolve, reject) => {
this.dlg.once('ack', (ackRequest) => {
this.logger.debug({body: ackRequest.body}, 'received ACK for 3pcc invite');
this.ep.modify(ackRequest.body)
.then(() => {
tidyUp();
return resolve();
})
.catch((err) => {
this.logger.info({err}, 'Error modifying endpoint with ACK');
reject(err);
});
});
this.logger.debug('this is a 3pcc invite');
});
}
this.dlg.on('modify', this._onReinvite.bind(this));
this.dlg.on('refer', this._onRefer.bind(this));
if (this.sipRequestWithinDialogHook) {
this.dlg.on('info', this._onRequestWithinDialog.bind(this));
this.dlg.on('message', this._onRequestWithinDialog.bind(this));
}
this.logger.debug(`CallSession:propagateAnswer - answered callSid ${this.callSid}`);
tidyUp();
}
else {
this.logger.debug('CallSession:propagateAnswer - call already answered - re-anchor media with a reinvite');
@@ -2318,6 +2506,12 @@ Duration=${duration} `
try {
const to = parseUri(req.getParsedHeader('Refer-To').uri);
const by = parseUri(req.getParsedHeader('Referred-By').uri);
const customHeaders = Object.keys(req.headers)
.filter((h) => h.toLowerCase().startsWith('x-'))
.reduce((acc, h) => {
acc[h] = req.get(h);
return acc;
}, {});
const b3 = this.b3;
const httpHeaders = b3 && {b3};
const json = await this.requestor.request('verb:hook', this._referHook, {
@@ -2330,6 +2524,7 @@ Duration=${duration} `
referred_by_user: by.scheme === 'tel' ? by.number : by.user,
referring_call_sid: this.callSid,
referred_call_sid: null,
...customHeaders
}
}, httpHeaders);
@@ -2505,27 +2700,35 @@ Duration=${duration} `
};
}
async releaseMediaToSBC(remoteSdp) {
async releaseMediaToSBC(remoteSdp, releaseMediaEntirely) {
assert(this.dlg && this.dlg.connected && this.ep && typeof remoteSdp === 'string');
await this.dlg.modify(remoteSdp, {
headers: {
'X-Reason': 'release-media'
'X-Reason': releaseMediaEntirely ? 'release-media-entirely' : 'release-media'
}
});
this.ep.destroy()
.then(() => this.ep = null)
.catch((err) => this.logger.error({err}, 'CallSession:releaseMediaToSBC: Error destroying endpoint'));
try {
await this.ep.destroy();
} catch (err) {
this.logger.error({err}, 'CallSession:releaseMediaToSBC: Error destroying endpoint');
}
this.ep = null;
}
async reAnchorMedia() {
async reAnchorMedia(currentMediaRoute = MediaPath.PartialMedia) {
assert(this.dlg && this.dlg.connected && !this.ep);
this.ep = await this.ms.createEndpoint({remoteSdp: this.dlg.remote.sdp});
this._configMsEndpoint();
await this.dlg.modify(this.ep.local.sdp, {
headers: {
'X-Reason': 'anchor-media'
}
});
this._configMsEndpoint();
if (currentMediaRoute === MediaPath.NoMedia) {
await this.ep.modify(this.dlg.remote.sdp);
}
}
async handleReinviteAfterMediaReleased(req, res) {
@@ -2592,6 +2795,7 @@ Duration=${duration} `
}
_configMsEndpoint() {
this._enableInbandDtmfIfRequired(this.ep);
const opts = {
...(this.onHoldMusic && {holdMusic: `shout://${this.onHoldMusic.replace(/^https?:\/\//, '')}`}),
...(JAMBONES_USE_FREESWITCH_TIMER_FD && {timer_name: 'timerfd'})
@@ -2601,6 +2805,20 @@ Duration=${duration} `
}
}
async _enableInbandDtmfIfRequired(ep) {
if (ep.inbandDtmfEnabled) return;
// only enable inband dtmf detection if voip carrier dtmf_type === tones
if (this.inbandDtmfEnabled) {
// https://developer.signalwire.com/freeswitch/FreeSWITCH-Explained/Modules/mod-dptools/6587132/#0-about
try {
ep.execute('start_dtmf');
ep.inbandDtmfEnabled = true;
} catch (err) {
this.logger.info(err, 'CallSession:_enableInbandDtmf - error enable inband DTMF');
}
}
}
/**
* notifyTaskError - only used when websocket connection is used instead of webhooks
*/
@@ -2680,6 +2898,37 @@ Duration=${duration} `
this.verbHookSpan = null;
}
}
_onTtsStreamingEmpty() {
const task = this.currentTask;
if (task && TaskName.Say === task.name) {
task.notifyTtsStreamIsEmpty();
}
}
_onTtsStreamingPause() {
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_paused'})
.catch((err) => this.logger.info({err}, 'CallSession:_onTtsStreamingPause - Error sending'));
}
_onTtsStreamingResume() {
this.requestor?.request('tts:streaming-event', 'streaming-event', {event_type: 'stream_resumed'})
.catch((err) => this.logger.info({err}, 'CallSession:_onTtsStreamingResume - Error sending'));
}
async _onTtsStreamingConnectFailure(vendor) {
const {writeAlerts, AlertType} = this.srf.locals;
try {
await writeAlerts({
alert_type: AlertType.TTS_STREAMING_CONNECTION_FAILURE,
account_sid: this.accountSid,
vendor
});
} catch (error) {
this.logger.error({error}, 'Error writing WEBHOOK_CONNECTION_FAILURE alert');
}
this.logger.info({vendor}, 'CallSession:_onTtsStreamingConnectFailure - tts streaming connect failure');
}
}
module.exports = CallSession;

View File

@@ -8,7 +8,7 @@ const CallSession = require('./call-session');
*/
class ConfirmCallSession extends CallSession {
constructor({logger, application, dlg, ep, tasks, callInfo, accountInfo, memberId, confName, rootSpan}) {
constructor({logger, application, dlg, ep, tasks, callInfo, accountInfo, memberId, confName, rootSpan, req}) {
super({
logger,
application,
@@ -23,6 +23,7 @@ class ConfirmCallSession extends CallSession {
});
this.dlg = dlg;
this.ep = ep;
this.req = req;
}
/**

View File

@@ -653,7 +653,8 @@ class Conference extends Task {
memberId: this.memberId,
confName: this.confName,
tasks,
rootSpan: cs.rootSpan
rootSpan: cs.rootSpan,
req: cs.req
});
await this._playSession.exec();
this._playSession = null;

View File

@@ -16,7 +16,8 @@ class TaskConfig extends Task {
'fillerNoise',
'actionHookDelayAction',
'boostAudioSignal',
'vad'
'vad',
'ttsStream'
].forEach((k) => this[k] = this.data[k] || {});
if ('notifyEvents' in this.data) {
@@ -45,6 +46,12 @@ class TaskConfig extends Task {
};
delete this.transcribeOpts.enable;
}
if (this.ttsStream.enable) {
this.sayOpts = {
verb: 'say',
stream: true
};
}
if (this.data.reset) {
if (typeof this.data.reset === 'string') this.data.reset = [this.data.reset];
@@ -75,6 +82,7 @@ class TaskConfig extends Task {
get hasVad() { return Object.keys(this.vad).length; }
get hasFillerNoise() { return Object.keys(this.fillerNoise).length; }
get hasReferHook() { return Object.keys(this.data).includes('referHook'); }
get hasTtsStream() { return Object.keys(this.ttsStream).length; }
get summary() {
const phrase = [];
@@ -106,6 +114,9 @@ class TaskConfig extends Task {
if (this.onHoldMusic) phrase.push(`onHoldMusic: ${this.onHoldMusic}`);
if ('boostAudioSignal' in this.data) phrase.push(`setGain ${this.data.boostAudioSignal}`);
if (this.hasReferHook) phrase.push('set referHook');
if (this.hasTtsStream) {
phrase.push(`${this.ttsStream.enable ? 'enable' : 'disable'} ttsStream`);
}
return `${this.name}{${phrase.join(',')}}`;
}
@@ -305,6 +316,22 @@ class TaskConfig extends Task {
if (this.hasReferHook) {
cs.referHook = this.data.referHook;
}
if (this.ttsStream.enable && this.sayOpts) {
this.sayOpts.synthesizer = this.hasSynthesizer ? this.synthesizer : {
vendor: cs.speechSynthesisVendor,
language: cs.speechSynthesisLanguage,
voice: cs.speechSynthesisVoice,
...(cs.speechSynthesisLabel && {
label: cs.speechSynthesisLabel
})
};
this.logger.info({opts: this.gatherOpts}, 'Config: enabling ttsStream');
cs.enableBackgroundTtsStream(this.sayOpts);
} else if (!this.ttsStream.enable) {
this.logger.info('Config: disabling ttsStream');
cs.disableTtsStream();
}
}
async kill(cs) {

View File

@@ -6,6 +6,7 @@ const {
TaskName,
TaskPreconditions,
MAX_SIMRINGS,
MediaPath,
KillReason
} = require('../utils/constants');
const assert = require('assert');
@@ -17,9 +18,12 @@ const dbUtils = require('../utils/db-utils');
const parseDecibels = require('../utils/parse-decibels');
const debug = require('debug')('jambonz:feature-server');
const {parseUri} = require('drachtio-srf');
const {ANCHOR_MEDIA_ALWAYS, JAMBONZ_DISABLE_DIAL_PAI_HEADER} = require('../config');
const {ANCHOR_MEDIA_ALWAYS,
JAMBONZ_DISABLE_DIAL_PAI_HEADER,
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER} = require('../config');
const { isOnhold, isOpusFirst } = require('../utils/sdp-utils');
const { normalizeJambones } = require('@jambonz/verb-specifications');
const { selectHostPort } = require('../utils/network');
function parseDtmfOptions(logger, dtmfCapture) {
let parentDtmfCollector, childDtmfCollector;
@@ -105,6 +109,7 @@ class TaskDial extends Task {
this.proxy = this.data.proxy;
this.tag = this.data.tag;
this.boostAudioSignal = this.data.boostAudioSignal;
this._mediaPath = MediaPath.FullMedia;
if (this.dtmfHook) {
const {parentDtmfCollector, childDtmfCollector} = parseDtmfOptions(logger, this.data.dtmfCapture || {});
@@ -152,17 +157,21 @@ class TaskDial extends Task {
get canReleaseMedia() {
const keepAnchor = this.data.anchorMedia ||
this.cs.isBackGroundListen ||
this.cs.onHoldMusic ||
ANCHOR_MEDIA_ALWAYS ||
this.listenTask ||
this.dubTasks ||
this.transcribeTask ||
this.startAmd;
this.cs.isBackGroundListen ||
this.cs.onHoldMusic ||
ANCHOR_MEDIA_ALWAYS ||
this.listenTask ||
this.dubTasks ||
this.transcribeTask ||
this.startAmd;
return !keepAnchor;
}
get shouldExitMediaPathEntirely() {
return this.data.exitMediaPath;
}
get summary() {
if (this.target.length === 1) {
const target = this.target[0];
@@ -183,6 +192,16 @@ class TaskDial extends Task {
async exec(cs) {
await super.exec(cs);
if (this.data.anchorMedia && this.data.exitMediaPath) {
this.logger.info('Dial:exec - incompatible anchorMedia and exitMediaPath are both set, will obey anchorMedia');
delete this.data.exitMediaPath;
}
if (!this.canReleaseMedia && this.data.exitMediaPath) {
this.logger.info(
'Dial:exec - exitMediaPath is set so features such as transcribe and record will not work on this call');
}
try {
if (this.listenTask) {
const {span, ctx} = this.startChildSpan(`nested:${this.listenTask.summary}`);
@@ -300,7 +319,7 @@ class TaskDial extends Task {
if (!cs.callGone && this.epOther) {
/* if we can release the media back to the SBC, do so now */
if (this.canReleaseMedia) this._releaseMedia(cs, this.sd);
if (this.canReleaseMedia) this._releaseMedia(cs, this.sd, this.shouldExitMediaPathEntirely);
else this.epOther.bridge(this.ep);
}
} catch (err) {
@@ -342,6 +361,12 @@ class TaskDial extends Task {
const by = parseUri(req.getParsedHeader('Referred-By').uri);
const referredBy = req.get('Referred-By');
const userAgent = req.get('User-Agent');
const customHeaders = Object.keys(req.headers)
.filter((h) => h.toLowerCase().startsWith('x-'))
.reduce((acc, h) => {
acc[h] = req.get(h);
return acc;
}, {});
this.logger.info({to}, 'refer to parsed');
const json = await cs.requestor.request('verb:hook', this.referHook, {
...(callInfo.toJSON()),
@@ -352,7 +377,8 @@ class TaskDial extends Task {
...(userAgent && {sip_user_agent: userAgent}),
...(by && {referred_by_user: by.scheme === 'tel' ? by.number : by.user}),
referring_call_sid,
referred_call_sid
referred_call_sid,
...customHeaders
}
}, httpHeaders);
if (json && Array.isArray(json)) {
@@ -378,6 +404,9 @@ class TaskDial extends Task {
this.logger.info(err, 'Dial:handleRefer - error setting new application after receiving REFER');
}
}
//caller and callee legs are briged together, accept refer with 202 will release callee leg endpoint
//that makes freeswitch release endpoint for caller leg.
if (this.ep) this.ep.unbridge();
res.send(202);
this.logger.info('DialTask:handleRefer - sent 202 Accepted');
} catch (err) {
@@ -491,7 +520,7 @@ class TaskDial extends Task {
const {getSBC} = srf.locals;
const {lookupTeamsByAccount, lookupAccountBySid} = srf.locals.dbHelpers;
const {lookupCarrier, lookupCarrierByPhoneNumber} = dbUtils(this.logger, cs.srf);
const sbcAddress = this.proxy || getSBC();
let sbcAddress = this.proxy || getSBC();
const teamsInfo = {};
let fqdn;
@@ -500,17 +529,22 @@ class TaskDial extends Task {
'X-Account-Sid': cs.accountSid,
...(req && req.has('X-CID') && {'X-CID': req.get('X-CID')}),
...(direction === 'outbound' && callInfo.sbcCallid && {'X-CID': callInfo.sbcCallid}),
...(req && req.has('P-Asserted-Identity') && !JAMBONZ_DISABLE_DIAL_PAI_HEADER &&
{'P-Asserted-Identity': req.get('P-Asserted-Identity')}),
...(!JAMBONZ_DISABLE_DIAL_PAI_HEADER && req && {
...(req.has('P-Asserted-Identity') && {'P-Asserted-Identity': req.get('P-Asserted-Identity')}),
...(req.has('Privacy') && {'Privacy': req.get('Privacy')}),
}),
...(req && req.has('X-Voip-Carrier-Sid') && {'X-Voip-Carrier-Sid': req.get('X-Voip-Carrier-Sid')}),
// Put headers at the end to make sure opt.headers override all default behavior.
...this.headers
};
// get calling user from From header
const parsedFrom = req.getParsedHeader('from');
const fromUri = parseUri(parsedFrom.uri);
const opts = {
headers: this.headers,
proxy: `sip:${sbcAddress}`,
callingNumber: this.callerId || req.callingNumber,
callingNumber: this.callerId || fromUri.user,
...(this.callerName && {callingName: this.callerName}),
opusFirst: isOpusFirst(this.cs.ep.remote.sdp)
};
@@ -556,6 +590,15 @@ class TaskDial extends Task {
this.logger.error({err}, 'Error looking up account by sid');
}
}
// find handling sbc sip for called user
if (JAMBONES_DIAL_SBC_FOR_REGISTERED_USER && t.type === 'user') {
const { registrar } = srf.locals.dbHelpers;
const reg = await registrar.query(t.name);
if (reg) {
sbcAddress = selectHostPort(this.logger, reg.sbcAddress, 'tcp')[1];
}
//sbc outbound return 404 Notfound to handle case called user is not reigstered.
}
if (t.type === 'phone' && t.trunk) {
const voip_carrier_sid = await lookupCarrier(cs.accountSid, t.trunk);
this.logger.info(`Dial:_attemptCalls: selected ${voip_carrier_sid} for requested carrier: ${t.trunk}`);
@@ -732,7 +775,7 @@ class TaskDial extends Task {
// Offhold, time to release media
const newSdp = await this.ep.modify(req.body);
await res.send(200, {body: newSdp});
await this._releaseMedia(this.cs, this.sd);
await this._releaseMedia(this.cs, this.sd, this.shouldExitMediaPathEntirely);
this.isOutgoingLegHold = false;
} else {
this.logger.debug('Dial: _onReinvite receive unhold Request, update media server');
@@ -841,7 +884,9 @@ class TaskDial extends Task {
}
/* if we can release the media back to the SBC, do so now */
if (this.canReleaseMedia) setTimeout(this._releaseMedia.bind(this, cs, sd), 200);
if (this.canReleaseMedia || this.shouldExitMediaPathEntirely) {
setTimeout(this._releaseMedia.bind(this, cs, sd, this.shouldExitMediaPathEntirely), 200);
}
}
_bridgeEarlyMedia(sd) {
@@ -853,22 +898,57 @@ class TaskDial extends Task {
}
}
/* public api */
async updateMediaPath(desiredPath) {
this.logger.info(`Dial:updateMediaPath - ${this._mediaPath} => ${desiredPath}`);
switch (desiredPath) {
case MediaPath.NoMedia:
assert(this._mediaPath !== MediaPath.NoMedia, 'updateMediaPath: already no-media');
await this._releaseMedia(this.cs, this.sd, true);
break;
case MediaPath.PartialMedia:
assert(this._mediaPath !== MediaPath.PartialMedia, 'updateMediaPath: already partial-media');
if (this._mediaPath === MediaPath.FullMedia) {
await this._releaseMedia(this.cs, this.sd, false);
}
else {
// to go from no-media to partial-media we need to go through full-media first
await this.reAnchorMedia(this.cs, this.sd);
await this._releaseMedia(this.cs, this.sd, false);
}
assert(!this.epOther, 'updateMediaPath: epOther should be null');
assert(!this.ep, 'updateMediaPath: ep should be null');
break;
case MediaPath.FullMedia:
assert(this._mediaPath !== MediaPath.FullMedia, 'updateMediaPath: already full-media');
await this.reAnchorMedia(this.cs, this.sd);
break;
default:
assert(false, `updateMediaPath: invalid path request ${desiredPath}`);
}
}
/**
* Release the media from freeswitch
* @param {*} cs
* @param {*} sd
*/
async _releaseMedia(cs, sd) {
async _releaseMedia(cs, sd, releaseEntirely = false) {
assert(cs.ep && sd.ep);
try {
// Wait until we got new SDP from B leg to ofter to A Leg
const aLegSdp = cs.ep.remote.sdp;
await sd.releaseMediaToSBC(aLegSdp, cs.ep.local.sdp);
await sd.releaseMediaToSBC(aLegSdp, cs.ep.local.sdp, releaseEntirely);
const bLegSdp = sd.dlg.remote.sdp;
await cs.releaseMediaToSBC(bLegSdp);
await cs.releaseMediaToSBC(bLegSdp, releaseEntirely);
this.epOther = null;
this.logger.info('Dial:_releaseMedia - successfully released media from freewitch');
this._mediaPath = releaseEntirely ? MediaPath.NoMedia : MediaPath.PartialMedia;
this.logger.info(
`Dial:_releaseMedia - successfully released media from freewitch, media path is now ${this._mediaPath}`);
} catch (err) {
this.logger.info({err}, 'Dial:_releaseMedia error');
}
@@ -878,8 +958,14 @@ class TaskDial extends Task {
if (cs.ep && sd.ep) return;
this.logger.info('Dial:reAnchorMedia - re-anchoring media to freewitch');
await Promise.all([sd.reAnchorMedia(), cs.reAnchorMedia()]);
await Promise.all([sd.reAnchorMedia(this._mediaPath), cs.reAnchorMedia(this._mediaPath)]);
this.epOther = cs.ep;
this.epOther.bridge(this.ep);
this._mediaPath = MediaPath.FullMedia;
this.logger.info(
`Dial:_releaseMedia - successfully re-anchored media to freewitch, media path is now ${this._mediaPath}`);
}
// Handle RE-INVITE hold from caller leg.
@@ -898,11 +984,12 @@ class TaskDial extends Task {
}
this._onHoldHook(req);
} else if (!isOnhold(req.body)) {
if (this.epOther && this.ep && this.isIncomingLegHold && this.canReleaseMedia) {
if (this.epOther && this.ep && this.isIncomingLegHold &&
(this.canReleaseMedia || this.shouldExitMediaPathEntirely)) {
// Offhold, time to release media
const newSdp = await this.epOther.modify(req.body);
await res.send(200, {body: newSdp});
await this._releaseMedia(this.cs, this.sd);
await this._releaseMedia(this.cs, this.sd, this.shouldExitMediaPathEntirely);
isHandled = true;
}
this.isIncomingLegHold = false;
@@ -961,7 +1048,8 @@ class TaskDial extends Task {
callInfo: this.cs.callInfo,
accountInfo: this.cs.accountInfo,
tasks,
rootSpan: this.cs.rootSpan
rootSpan: this.cs.rootSpan,
req: this.cs.req
});
await this._onHoldSession.exec();
this._onHoldSession = null;

View File

@@ -369,7 +369,8 @@ class TaskEnqueue extends Task {
callInfo: cs.callInfo,
accountInfo: cs.accountInfo,
tasks: tasksToRun,
rootSpan: cs.rootSpan
rootSpan: cs.rootSpan,
req: cs.req
});
await this._playSession.exec();
this._playSession = null;

View File

@@ -140,7 +140,6 @@ class TaskGather extends SttTask {
async handling(cs, {ep}) {
this.logger.debug({options: this.data}, 'Gather:exec');
await super.exec(cs, {ep});
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
this.fillerNoise = {
...(cs.fillerNoise || {}),
@@ -156,7 +155,8 @@ class TaskGather extends SttTask {
const {hints, hintsBoost} = cs.globalSttHints;
const setOfHints = new Set((this.data.recognizer.hints || [])
.concat(hints)
.filter((h) => typeof h === 'string' && h.length > 0));
// allow for hints to be an array of object
.filter((h) => (typeof h === 'string' && h.length > 0) || (typeof h === 'object')));
this.data.recognizer.hints = [...setOfHints];
if (!this.data.recognizer.hintsBoost && hintsBoost) this.data.recognizer.hintsBoost = hintsBoost;
this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
@@ -210,7 +210,6 @@ class TaskGather extends SttTask {
return;
}
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
} catch (e) {
await this._startFallback(cs, ep, {error: e});
}
@@ -290,8 +289,6 @@ class TaskGather extends SttTask {
await this._setSpeechHandlers(cs, ep);
if (!this.resolved && !this.killed) {
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
}
else {
this.logger.info('Gather:exec - task was killed or resolved quickly, not starting transcription');
@@ -346,6 +343,13 @@ class TaskGather extends SttTask {
this._killAudio(cs);
this.emit('dtmf', evt);
}
if (this.isContinuousAsr && evt.dtmf === this.asrDtmfTerminationDigit && this._bufferedTranscripts.length > 0) {
this.logger.info(`continuousAsr triggered with dtmf ${this.asrDtmfTerminationDigit}`);
this._clearAsrTimer();
this._clearTimer();
this._startFinalAsrTimer();
return;
}
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
resolved = true;
this._resolve('dtmf-terminator-key');
@@ -368,13 +372,6 @@ class TaskGather extends SttTask {
this._resolve('dtmf-num-digits');
}
}
else if (this.isContinuousAsr && evt.dtmf === this.asrDtmfTerminationDigit) {
this.logger.info(`continuousAsr triggered with dtmf ${this.asrDtmfTerminationDigit}`);
this._clearAsrTimer();
this._clearTimer();
this._startFinalAsrTimer();
return;
}
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
/* start interDigitTimer */
const ms = this.interDigitTimeout * 1000;
@@ -726,6 +723,7 @@ class TaskGather extends SttTask {
this._fillerNoiseOn = false; // in a race, if we just started audio it may sneak through here
this.ep.api('uuid_break', this.ep.uuid)
.catch((err) => this.logger.info(err, 'Error killing audio'));
cs.clearTtsStream();
}
return;
}
@@ -850,7 +848,10 @@ class TaskGather extends SttTask {
this._startAsrTimer();
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'speechmatics'].includes(this.vendor) &&
!this.vendor.startsWith('custom')) {
this._startTranscribing(ep);
}
}
else {
/* this was removed to fix https://github.com/jambonz/jambonz-feature-server/issues/783 */
@@ -924,7 +925,7 @@ class TaskGather extends SttTask {
}
}
// If transcription received, reset timeout timer.
if (this._timeoutTimer) {
if (this._timeoutTimer && !emptyTranscript) {
this._startTimer();
}
/* restart asr timer if we get a partial transcript (only if the asr timer is already running) */
@@ -969,7 +970,6 @@ class TaskGather extends SttTask {
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
try {
this.logger.debug('gather:_startFallback');
this.notifyError({ msg: 'ASR error',
@@ -978,7 +978,6 @@ class TaskGather extends SttTask {
this._speechHandlersSet = false;
await this._setSpeechHandlers(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
return true;
} catch (error) {
this.logger.info({error}, `There is error while falling back to ${this.fallbackVendor}`);
@@ -1172,7 +1171,6 @@ class TaskGather extends SttTask {
} catch (err) { /*already logged error*/ }
// Gather got response from hook, cancel actionHookDelay processing
this.logger.debug('TaskGather:_resolve - checking ahd');
if (this.cs.actionHookDelayProcessor) {
if (returnedVerbs) {
this.logger.debug('TaskGather:_resolve - got response from action hook, cancelling actionHookDelay');

View File

@@ -1,6 +1,7 @@
const Task = require('../task');
const {TaskPreconditions} = require('../../utils/constants');
const TaskLlmOpenAI_S2S = require('./llms/openai_s2s');
const TaskLlmVoiceAgent_S2S = require('./llms/voice_agent_s2s');
class TaskLlm extends Task {
constructor(logger, opts) {
@@ -44,6 +45,12 @@ class TaskLlm extends Task {
llm = new TaskLlmOpenAI_S2S(this.logger, this.data, this);
}
break;
case 'voiceagent':
case 'deepgram':
llm = new TaskLlmVoiceAgent_S2S(this.logger, this.data, this);
break;
default:
throw new Error(`Unsupported vendor ${this.vendor} for LLM`);
}

View File

@@ -59,7 +59,7 @@ class TaskLlmOpenAI_S2S extends Task {
this.parent = parentTask;
this.vendor = this.parent.vendor;
this.model = this.parent.model;
this.model = this.parent.model || 'gpt-4o-realtime-preview-2024-12-17';
this.auth = this.parent.auth;
this.connectionOptions = this.parent.connectOptions;
@@ -120,9 +120,9 @@ class TaskLlmOpenAI_S2S extends Task {
switch (this.vendor) {
case 'openai':
return 'v1/realtime?model=gpt-4o-realtime-preview-2024-10-01';
return 'v1/realtime?model=${this.model}';
case 'microsoft':
return 'openai/realtime?api-version=2024-10-01-preview&deployment=gpt-4o-realtime-preview-1001&';
return `openai/realtime?api-version=2024-10-01-preview&deployment=${this.model}`;
}
}

View File

@@ -0,0 +1,313 @@
const Task = require('../../task');
const TaskName = 'Llm_VoiceAgent_s2s';
const {LlmEvents_VoiceAgent} = require('../../../utils/constants');
const ClientEvent = 'client.event';
const SessionDelete = 'session.delete';
const va_server_events = [
'Error',
'Welcome',
'SettingsApplied',
'ConversationText',
'UserStartedSpeaking',
'EndOfThought',
'AgentThinking',
'FunctionCallRequest',
'FunctionCalling',
'AgentStartedSpeaking',
'AgentAudioDone',
];
const expandWildcards = (events) => {
// no-op for deepgram
return events;
};
class TaskLlmVoiceAgent_S2S extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts, parentTask);
this.parent = parentTask;
this.vendor = this.parent.vendor;
this.model = this.parent.model;
this.auth = this.parent.auth;
this.connectionOptions = this.parent.connectOptions;
const {apiKey} = this.auth || {};
if (!apiKey) throw new Error('auth.apiKey is required for VoiceAgent S2S');
this.apiKey = apiKey;
this.authType = 'bearer';
this.actionHook = this.data.actionHook;
this.eventHook = this.data.eventHook;
this.toolHook = this.data.toolHook;
const {settingsConfiguration} = this.data.llmOptions;
if (typeof settingsConfiguration !== 'object') {
throw new Error('llmOptions with an initial settingsConfiguration is required for VoiceAgent S2S');
}
// eslint-disable-next-line no-unused-vars
const {audio, ...rest} = settingsConfiguration;
const cfg = this.settingsConfiguration = rest;
if (!cfg.agent) throw new Error('llmOptions.settingsConfiguration.agent is required for VoiceAgent S2S');
if (!cfg.agent.think) {
throw new Error('llmOptions.settingsConfiguration.agent.think is required for VoiceAgent S2S');
}
if (!cfg.agent.think.model) {
throw new Error('llmOptions.settingsConfiguration.agent.think.model is required for VoiceAgent S2S');
}
if (!cfg.agent.think.provider?.type) {
throw new Error('llmOptions.settingsConfiguration.agent.think.provider.type is required for VoiceAgent S2S');
}
this.results = {
completionReason: 'normal conversation end'
};
/**
* only one of these will have items,
* if includeEvents, then these are the events to include
* if excludeEvents, then these are the events to exclude
*/
this.includeEvents = [];
this.excludeEvents = [];
/* default to all events if user did not specify */
this._populateEvents(this.data.events || va_server_events);
this.addCustomEventListener = parentTask.addCustomEventListener.bind(parentTask);
this.removeCustomEventListeners = parentTask.removeCustomEventListeners.bind(parentTask);
}
get name() { return TaskName; }
get host() {
const {host} = this.connectionOptions || {};
return host || 'agent.deepgram.com';
}
get path() {
const {path} = this.connectionOptions || {};
if (path) return path;
return '/agent';
}
async _api(ep, args) {
const res = await ep.api('uuid_voice_agent_s2s', `^^|${args.join('|')}`);
if (!res.body?.startsWith('+OK')) {
throw new Error({args}, `Error calling uuid_voice_agent_s2s: ${JSON.stringify(res.body)}`);
}
}
async exec(cs, {ep}) {
await super.exec(cs);
await this._startListening(cs, ep);
await this.awaitTaskDone();
/* note: the parent llm verb started the span, which is why this is necessary */
await this.parent.performAction(this.results);
this._unregisterHandlers();
}
async kill(cs) {
super.kill(cs);
this._api(cs.ep, [cs.ep.uuid, SessionDelete])
.catch((err) => this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:kill - error deleting session'));
this.notifyTaskDone();
}
/**
* Send function call response to the VoiceAgent server
*/
async processToolOutput(ep, tool_call_id, data) {
try {
const {data:response} = data;
this.logger.debug({tool_call_id, response}, 'TaskLlmVoiceAgent_S2S:processToolOutput');
if (!response.type || response.type !== 'FunctionCallResponse') {
this.logger.info({response},
'TaskLlmVoiceAgent_S2S:processToolOutput - invalid tool output, must be FunctionCallResponse');
}
else {
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(response)]);
}
} catch (err) {
this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:processToolOutput');
}
}
/**
* Send a session.update to the VoiceAgent server
* Note: creating and deleting conversation items also supported as well as interrupting the assistant
*/
async processLlmUpdate(ep, data, _callSid) {
try {
this.logger.debug({data, _callSid}, 'TaskLlmVoiceAgent_S2S:processLlmUpdate');
if (!data.type || ![
'UpdateInstructions',
'UpdateSpeak',
'InjectAgentMessage',
].includes(data.type)) {
this.logger.info({data}, 'TaskLlmVoiceAgent_S2S:processLlmUpdate - invalid mid-call request');
}
else {
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]);
}
} catch (err) {
this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:processLlmUpdate');
}
}
async _startListening(cs, ep) {
this._registerHandlers(ep);
try {
const args = [ep.uuid, 'session.create', this.host, this.path, this.authType, this.apiKey];
await this._api(ep, args);
} catch (err) {
this.logger.error({err}, `TaskLlmVoiceAgent_S2S:_startListening: ${JSON.stringify(err)}`);
this.notifyTaskDone();
}
}
async _sendClientEvent(ep, obj) {
let ok = true;
this.logger.debug({obj}, 'TaskLlmVoiceAgent_S2S:_sendClientEvent');
try {
const args = [ep.uuid, ClientEvent, JSON.stringify(obj)];
await this._api(ep, args);
} catch (err) {
ok = false;
this.logger.error({err}, 'TaskLlmVoiceAgent_S2S:_sendClientEvent - Error');
}
return ok;
}
async _sendInitialMessage(ep) {
if (!await this._sendClientEvent(ep, this.settingsConfiguration)) {
this.notifyTaskDone();
}
}
_registerHandlers(ep) {
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.Connect, this._onConnect.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.ConnectFailure, this._onConnectFailure.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.Disconnect, this._onDisconnect.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.ServerEvent, this._onServerEvent.bind(this, ep));
}
_unregisterHandlers() {
this.removeCustomEventListeners();
}
_onError(_ep, evt) {
this.logger.info({evt}, 'TaskLlmVoiceAgent_S2S:_onError');
this.notifyTaskDone();
}
_onConnect(ep) {
this.logger.debug('TaskLlmVoiceAgent_S2S:_onConnect');
this._sendInitialMessage(ep);
}
_onConnectFailure(_ep, evt) {
this.logger.info(evt, 'TaskLlmVoiceAgent_S2S:_onConnectFailure');
this.results = {completionReason: 'connection failure'};
this.notifyTaskDone();
}
_onDisconnect(_ep, evt) {
this.logger.info(evt, 'TaskLlmVoiceAgent_S2S:_onConnectFailure');
this.results = {completionReason: 'disconnect from remote end'};
this.notifyTaskDone();
}
async _onServerEvent(_ep, evt) {
let endConversation = false;
const type = evt.type;
this.logger.info({evt}, 'TaskLlmVoiceAgent_S2S:_onServerEvent');
/* check for failures, such as rate limit exceeded, that should terminate the conversation */
if (type === 'response.done' && evt.response.status === 'failed') {
endConversation = true;
this.results = {
completionReason: 'server failure',
error: evt.response.status_details?.error
};
}
/* server errors of some sort */
else if (type === 'error') {
endConversation = true;
this.results = {
completionReason: 'server error',
error: evt.error
};
}
/* tool calls */
else if (type === 'FunctionCallRequest') {
this.logger.debug({evt}, 'TaskLlmVoiceAgent_S2S:_onServerEvent - function_call');
if (!this.toolHook) {
this.logger.warn({evt}, 'TaskLlmVoiceAgent_S2S:_onServerEvent - no toolHook defined!');
}
else {
const {function_name:name, function_call_id:call_id} = evt;
const args = evt.input;
try {
await this.parent.sendToolHook(call_id, {name, args});
} catch (err) {
this.logger.info({err, evt}, 'TaskLlmVoiceAgent - error calling function');
this.results = {
completionReason: 'client error calling function',
error: err
};
endConversation = true;
}
}
}
/* check whether we should notify on this event */
if (this.includeEvents.length > 0 ? this.includeEvents.includes(type) : !this.excludeEvents.includes(type)) {
this.parent.sendEventHook(evt)
.catch((err) => this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:_onServerEvent - error sending event hook'));
}
if (endConversation) {
this.logger.info({results: this.results},
'TaskLlmVoiceAgent_S2S:_onServerEvent - ending conversation due to error');
this.notifyTaskDone();
}
}
_populateEvents(events) {
if (events.includes('all')) {
/* work by excluding specific events */
const exclude = events
.filter((evt) => evt.startsWith('-'))
.map((evt) => evt.slice(1));
if (exclude.length === 0) this.includeEvents = va_server_events;
else this.excludeEvents = expandWildcards(exclude);
}
else {
/* work by including specific events */
const include = events
.filter((evt) => !evt.startsWith('-'));
this.includeEvents = expandWildcards(include);
}
this.logger.debug({
includeEvents: this.includeEvents,
excludeEvents: this.excludeEvents
}, 'TaskLlmVoiceAgent_S2S:_populateEvents');
}
}
module.exports = TaskLlmVoiceAgent_S2S;

View File

@@ -1,5 +1,6 @@
const Task = require('./task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
const { PlayFileNotFoundError } = require('../utils/error');
class TaskPlay extends Task {
constructor(logger, opts) {
@@ -66,8 +67,20 @@ class TaskPlay extends Task {
}
}
} catch (err) {
if (timeout) clearTimeout(timeout);
this.logger.info(err, `TaskPlay:exec - error playing ${this.url}`);
this.logger.info(`TaskPlay:exec - error playing ${this.url}: ${err.message}`);
this.playComplete = true;
if (err.message === 'File Not Found') {
const {writeAlerts, AlertType} = cs.srf.locals;
await this.performAction({status: 'fail', reason: 'playFailed'}, !(this.parentTask || cs.isConfirmCallSession));
this.emit('playDone');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.PLAY_FILENOTFOUND,
url: this.url,
target_sid: cs.callSid
});
throw new PlayFileNotFoundError(this.url);
}
}
this.emit('playDone');
}

View File

@@ -77,11 +77,13 @@ class TaskRestDial extends Task {
synthesizer: {
vendor: cs.speechSynthesisVendor,
language: cs.speechSynthesisLanguage,
voice: cs.speechSynthesisVoice
voice: cs.speechSynthesisVoice,
label: cs.speechSynthesisLabel,
},
recognizer: {
vendor: cs.speechRecognizerVendor,
language: cs.speechRecognizerLanguage
language: cs.speechRecognizerLanguage,
label: cs.speechRecognizerLabel,
}
}
};

View File

@@ -1,3 +1,4 @@
const assert = require('assert');
const TtsTask = require('./tts-task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
const pollySSMLSplit = require('polly-ssml-split');
@@ -35,24 +36,40 @@ class TaskSay extends TtsTask {
super(logger, opts, parentTask);
this.preconditions = TaskPreconditions.Endpoint;
this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
.map((t) => breakLengthyTextIfNeeded(this.logger, t))
.flat();
assert.ok((typeof this.data.text === 'string' || Array.isArray(this.data.text)) || this.data.stream === true,
'Say: either text or stream:true is required');
this.loop = this.data.loop || 1;
this.isHandledByPrimaryProvider = true;
if (this.data.stream === true) {
this._isStreamingTts = true;
this.closeOnStreamEmpty = this.data.closeOnStreamEmpty !== false;
}
else {
this._isStreamingTts = false;
this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
.map((t) => breakLengthyTextIfNeeded(this.logger, t))
.flat();
this.loop = this.data.loop || 1;
this.isHandledByPrimaryProvider = true;
}
}
get name() { return TaskName.Say; }
get summary() {
for (let i = 0; i < this.text.length; i++) {
if (this.text[i].startsWith('silence_stream')) continue;
return `${this.name}{text=${this.text[i].slice(0, 15)}${this.text[i].length > 15 ? '...' : ''}}`;
if (this.isStreamingTts) return `${this.name} streaming`;
else {
for (let i = 0; i < this.text.length; i++) {
if (this.text[i].startsWith('silence_stream')) continue;
return `${this.name}{text=${this.text[i].slice(0, 15)}${this.text[i].length > 15 ? '...' : ''}}`;
}
return `${this.name}{${this.text[0]}}`;
}
return `${this.name}{${this.text[0]}}`;
}
get isStreamingTts() { return this._isStreamingTts; }
_validateURL(urlString) {
try {
new URL(urlString);
@@ -63,14 +80,19 @@ class TaskSay extends TtsTask {
}
async exec(cs, obj) {
if (this.isStreamingTts && !cs.appIsUsingWebsockets) {
throw new Error('Say: streaming say verb requires applications to use the websocket API');
}
try {
await this.handling(cs, obj);
if (this.isStreamingTts) await this.handlingStreaming(cs, obj);
else await this.handling(cs, obj);
this.emit('playDone');
} catch (error) {
if (error instanceof SpeechCredentialError) {
// if say failed due to speech credentials, alarm is writtern and error notification is sent
// finished this say to move to next task.
this.logger.info('Say failed due to SpeechCredentialError, finished!');
this.logger.info({error}, 'Say failed due to SpeechCredentialError, finished!');
this.emit('playDone');
return;
}
@@ -78,6 +100,35 @@ class TaskSay extends TtsTask {
}
}
async handlingStreaming(cs, {ep}) {
const {vendor, language, voice, label} = this.getTtsVendorData(cs);
const credentials = cs.getSpeechCredentials(vendor, 'tts', label);
if (!credentials) {
throw new SpeechCredentialError(
`No text-to-speech service credentials for ${vendor} with labels: ${label} have been configured`);
}
try {
await this.setTtsStreamingChannelVars(vendor, language, voice, credentials, ep);
await cs.startTtsStream();
cs.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_open'})
.catch((err) => this.logger.info({err}, 'TaskSay:handlingStreaming - Error sending'));
} catch (err) {
this.logger.info({err}, 'TaskSay:handlingStreaming - Error setting channel vars');
cs.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
.catch((err) => this.logger.info({err}, 'TaskSay:handlingStreaming - Error sending'));
//TODO: send tts:streaming-event with error?
this.notifyTaskDone();
}
await this.awaitTaskDone();
this.logger.info('TaskSay:handlingStreaming - done');
}
async handling(cs, {ep}) {
const {srf, accountSid:account_sid, callSid:target_sid} = cs;
const {writeAlerts, AlertType} = srf.locals;
@@ -96,7 +147,7 @@ class TaskSay extends TtsTask {
let voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
this.synthesizer.voice :
cs.speechSynthesisVoice;
let label = this.taskInlcudeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
let label = this.taskIncludeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
const fallbackVendor = this.synthesizer.fallbackVendor && this.synthesizer.fallbackVendor !== 'default' ?
this.synthesizer.fallbackVendor :
@@ -107,7 +158,7 @@ class TaskSay extends TtsTask {
const fallbackVoice = this.synthesizer.fallbackVoice && this.synthesizer.fallbackVoice !== 'default' ?
this.synthesizer.fallbackVoice :
cs.fallbackSpeechSynthesisVoice;
const fallbackLabel = this.taskInlcudeSynthesizer ?
const fallbackLabel = this.taskIncludeSynthesizer ?
this.synthesizer.fallbackLabel : cs.fallbackSpeechSynthesisLabel;
if (cs.hasFallbackTts) {
@@ -253,6 +304,7 @@ class TaskSay extends TtsTask {
this._playResolve = null;
}
}
this.notifyTaskDone();
}
_addStreamingTtsAttributes(span, evt) {
@@ -263,6 +315,7 @@ class TaskSay extends TtsTask {
.replace('whisper_', 'whisper.')
.replace('deepgram_', 'deepgram.')
.replace('playht_', 'playht.')
.replace('cartesia_', 'cartesia.')
.replace('rimelabs_', 'rimelabs.')
.replace('verbio_', 'verbio.')
.replace('elevenlabs_', 'elevenlabs.');
@@ -273,6 +326,13 @@ class TaskSay extends TtsTask {
delete attrs['cache_filename']; //no value in adding this to the span
span.setAttributes(attrs);
}
notifyTtsStreamIsEmpty() {
if (this.isStreamingTts && this.closeOnStreamEmpty) {
this.logger.info('TaskSay:notifyTtsStreamIsEmpty - stream is empty, killing task');
this.notifyTaskDone();
}
}
}
const spanMapping = {
@@ -309,6 +369,11 @@ const spanMapping = {
'playht.name_lookup_time_ms': 'name_lookup_ms',
'playht.connect_time_ms': 'connect_ms',
'playht.final_response_time_ms': 'final_response_ms',
// Cartesia
'cartesia.request_id': 'cartesia.req_id',
'cartesia.name_lookup_time_ms': 'name_lookup_ms',
'cartesia.connect_time_ms': 'connect_ms',
'cartesia.final_response_time_ms': 'final_response_ms',
// Rimelabs
'rimelabs.name_lookup_time_ms': 'name_lookup_ms',
'rimelabs.connect_time_ms': 'connect_ms',

View File

@@ -12,6 +12,7 @@ class TaskSipRefer extends Task {
this.referTo = this.data.referTo;
this.referredBy = this.data.referredBy;
this.referredByDisplayName = this.data.referredByDisplayName;
this.headers = this.data.headers || {};
this.eventHook = this.data.eventHook;
}
@@ -94,7 +95,10 @@ class TaskSipRefer extends Task {
}
if (status >= 200) {
this.referSpan.setAttributes({'refer.finalNotify': status});
await this.performAction({refer_status: 202, final_referred_call_status: status});
await this.performAction({refer_status: 202, final_referred_call_status: status})
.catch((err) => {
this.logger.error(err, 'TaskSipRefer:exec - error performing action finalNotify');
});
this.notifyTaskDone();
}
}
@@ -102,7 +106,7 @@ class TaskSipRefer extends Task {
}
_normalizeReferHeaders(cs, dlg) {
let {referTo, referredBy} = this;
let {referTo, referredBy, referredByDisplayName} = this;
/* get IP address of the SBC to use as hostname if needed */
const {host} = parseUri(dlg.remote.uri);
@@ -117,9 +121,12 @@ class TaskSipRefer extends Task {
referredBy = cs.req?.callingNumber || dlg.local.uri;
this.logger.info({referredBy}, 'setting referredby');
}
if (!referredByDisplayName) {
referredByDisplayName = cs.req?.callingName;
}
if (!referredBy.startsWith('<') && !referredBy.startsWith('sip') && !referredBy.startsWith('"')) {
/* they may have only provided a phone number/user */
referredBy = `sip:${referredBy}@${host}`;
referredBy = `${referredByDisplayName ? `"${referredByDisplayName}"` : ''}<sip:${referredBy}@${host}>`;
}
return {referTo, referredBy};
}

View File

@@ -219,7 +219,8 @@ class SttTask extends Task {
roleArn
});
this.logger.debug({roleArn}, `(roleArn) got aws access token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...credentials, accessKeyId, secretAccessKey, sessionToken};
// from role ARN, we will get SessionToken, but feature server use it as securityToken.
credentials = {...credentials, accessKeyId, secretAccessKey, securityToken: sessionToken};
}
else if (vendor === 'verbio' && credentials.client_id && credentials.client_secret) {
const {access_token, servedFromCache} = await getVerbioAccessToken(credentials);
@@ -229,9 +230,13 @@ class SttTask extends Task {
}
else if (vendor == 'aws' && !JAMBONES_AWS_TRANSCRIBE_USE_GRPC) {
/* get AWS access token */
const {accessKeyId, secretAccessKey, securityToken, region } = credentials;
const {speech_credential_sid, accessKeyId, secretAccessKey, securityToken, region } = credentials;
if (!securityToken) {
const { servedFromCache, ...newCredentials} = await getAwsAuthToken({accessKeyId, secretAccessKey, region});
const { servedFromCache, ...newCredentials} = await getAwsAuthToken({
speech_credential_sid,
accessKeyId,
secretAccessKey,
region});
this.logger.debug({newCredentials}, `got aws security token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...newCredentials, region};
}

View File

@@ -27,6 +27,7 @@ class TaskTranscribe extends SttTask {
super(logger, opts, parentTask);
this.transcriptionHook = this.data.transcriptionHook;
this.translationHook = this.data.translationHook;
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
if (this.data.recognizer) {
@@ -100,7 +101,6 @@ class TaskTranscribe extends SttTask {
...this.data.recognizer.nuanceOptions
};
}
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
if (cs.hasGlobalSttHints) {
const {hints, hintsBoost} = cs.globalSttHints;
@@ -117,9 +117,6 @@ class TaskTranscribe extends SttTask {
if (this.transcribing2) {
await this._startTranscribing(cs, ep2, 2);
}
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
} catch (err) {
if (!(await this._startFallback(cs, ep, {error: err}))) {
this.logger.info(err, 'TaskTranscribe:exec - error');
@@ -306,7 +303,9 @@ class TaskTranscribe extends SttTask {
case 'speechmatics':
this.bugname = `${this.bugname_prefix}speechmatics_transcribe`;
this.addCustomEventListener(
ep, SpeechmaticsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep, SpeechmaticsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(
ep, SpeechmaticsTranscriptionEvents.Translation, this._onTranslation.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Info,
this._onSpeechmaticsInfo.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.RecognitionStarted,
@@ -445,7 +444,7 @@ class TaskTranscribe extends SttTask {
this._startAsrTimer(channel);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics']
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
}
else {
@@ -470,7 +469,7 @@ class TaskTranscribe extends SttTask {
this._resolve(channel, evt);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google'].includes(this.vendor) &&
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics'].includes(this.vendor) &&
!this.vendor.startsWith('custom:')) {
this.logger.debug('TaskTranscribe:_onTranscription - restarting transcribe');
this._startTranscribing(cs, ep, channel);
@@ -496,6 +495,47 @@ class TaskTranscribe extends SttTask {
}
}
async _onTranslation(_cs, _ep, channel, evt, _fsEvent) {
this.logger.debug({evt}, 'TaskTranscribe:_onTranslation');
if (this.translationHook && evt.results?.length > 0) {
try {
const b3 = this.getTracingPropagation();
const httpHeaders = b3 && {b3};
const payload = {
...this.cs.callInfo,
...httpHeaders,
translation: {
channel,
language: evt.language,
translation: evt.results[0].content
}
};
this.logger.debug({payload}, 'sending translationHook');
const json = await this.cs.requestor.request('verb:hook', this.translationHook, payload);
this.logger.info({json}, 'completed translationHook');
if (json && Array.isArray(json) && !this.parentTask) {
const makeTask = require('./make_task');
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
if (tasks && tasks.length > 0) {
this.logger.info({tasks: tasks}, `${this.name} replacing application with ${tasks.length} tasks`);
this.cs.replaceApplication(tasks);
}
}
} catch (err) {
this.logger.info(err, 'TranscribeTask:_onTranslation error');
}
if (this.parentTask) {
this.parentTask.emit('translation', evt);
}
}
if (this.killed) {
this.logger.debug('TaskTranscribe:_onTranslation exiting after receiving final transcription');
this._clearTimer();
this.notifyTaskDone();
}
}
async _resolve(channel, evt) {
if (evt.is_final) {
/* we've got a final transcript, so end the otel child span for this channel */
@@ -601,7 +641,6 @@ class TaskTranscribe extends SttTask {
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
try {
this.notifyError({ msg: 'ASR error',
details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'in progress'});
@@ -612,7 +651,6 @@ class TaskTranscribe extends SttTask {
}
this[`_speechHandlersSet_${channel}`] = false;
this._startTranscribing(cs, _ep, channel);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
return true;
} catch (error) {
this.notifyError({ msg: 'ASR error',
@@ -677,7 +715,7 @@ class TaskTranscribe extends SttTask {
this.logger.debug({evt}, 'TaskGather:_onSpeechmaticsInfo');
}
async _onSpeechmaticsErrror(cs, _ep, evt) {
async _onSpeechmaticsError(cs, _ep, evt) {
// eslint-disable-next-line no-unused-vars
const {message, ...e} = evt;
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});

View File

@@ -1,6 +1,7 @@
const Task = require('./task');
const { TaskPreconditions } = require('../utils/constants');
const { SpeechCredentialError } = require('../utils/error');
const dbUtils = require('../utils/db-utils');
class TtsTask extends Task {
@@ -12,11 +13,11 @@ class TtsTask extends Task {
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
/**
* Task use taskInlcudeSynthesizer to identify
* if taskInlcudeSynthesizer === true, use label from verb.synthesizer, even it's empty
* if taskInlcudeSynthesizer === false, use label from application.synthesizer
* Task use taskIncludeSynthesizer to identify
* if taskIncludeSynthesizer === true, use label from verb.synthesizer, even it's empty
* if taskIncludeSynthesizer === false, use label from application.synthesizer
*/
this.taskInlcudeSynthesizer = !!this.data.synthesizer;
this.taskIncludeSynthesizer = !!this.data.synthesizer;
this.synthesizer = this.data.synthesizer || {};
this.disableTtsCache = this.data.disableTtsCache;
this.options = this.synthesizer.options || {};
@@ -43,9 +44,66 @@ class TtsTask extends Task {
}
}
getTtsVendorData(cs) {
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
this.synthesizer.vendor :
cs.speechSynthesisVendor;
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
this.synthesizer.language :
cs.speechSynthesisLanguage ;
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
this.synthesizer.voice :
cs.speechSynthesisVoice;
const label = this.taskIncludeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
return {vendor, language, voice, label};
}
async setTtsStreamingChannelVars(vendor, language, voice, credentials, ep) {
const {api_key, model_id} = credentials;
const {stability, similarity_boost, use_speaker_boost, style} = this.options;
let obj;
this.logger.debug({credentials},
`setTtsStreamingChannelVars: vendor: ${vendor}, language: ${language}, voice: ${voice}`);
switch (vendor) {
case 'deepgram':
obj = {
DEEPGRAM_API_KEY: api_key,
DEEPGRAM_TTS_STREAMING_MODEL: voice
};
break;
case 'cartesia':
obj = {
CARTESIA_API_KEY: api_key,
CARTESIA_TTS_STREAMING_MODEL_ID: model_id,
CARTESIA_TTS_STREAMING_VOICE_ID: voice,
CARTESIA_TTS_STREAMING_LANGUAGE: language || 'en',
};
break;
case 'elevenlabs':
obj = {
ELEVENLABS_API_KEY: api_key,
ELEVENLABS_TTS_STREAMING_MODEL_ID: model_id,
ELEVENLABS_TTS_STREAMING_VOICE_ID: voice,
// 20/12/2024 - only eleven_turbo_v2_5 support multiple language
...(['eleven_turbo_v2_5'].includes(model_id) && {ELEVENLABS_TTS_STREAMING_LANGUAGE: language}),
...(stability && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_STABILITY: stability}),
...(similarity_boost && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_SIMILARITY_BOOST: similarity_boost}),
...(use_speaker_boost && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_USE_SPEAKER_BOOST: use_speaker_boost}),
...(style && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_STYLE: style})
};
break;
default:
throw new Error(`vendor ${vendor} is not supported for tts streaming yet`);
}
this.logger.info({vendor, credentials, obj}, 'setTtsStreamingChannelVars');
await ep.set(obj);
}
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
const {srf, accountSid:account_sid} = cs;
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals;
const {synthAudio} = srf.locals.dbHelpers;
const engine = this.synthesizer.engine || cs.synthesizer?.engine || 'neural';
@@ -95,6 +153,18 @@ class TtsTask extends Task {
} else if (vendor === 'playht') {
credentials = credentials || {};
credentials.voice_engine = this.options.voice_engine || credentials.voice_engine;
} else if (vendor === 'google' && typeof voice === 'string' && voice.startsWith('custom_')) {
const {lookupGoogleCustomVoice} = dbUtils(this.logger, cs.srf);
const arr = /custom_(.*)/.exec(voice);
if (arr) {
const google_custom_voice_sid = arr[1];
const [custom_voice] = await lookupGoogleCustomVoice(google_custom_voice_sid);
if (custom_voice.use_voice_cloning_key) {
voice = {
voice_cloning_key: custom_voice.voice_cloning_key,
};
}
}
}
/**
@@ -128,8 +198,6 @@ class TtsTask extends Task {
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
throw new SpeechCredentialError('no provisioned speech credentials for TTS');
}
// synthesize all of the text elements
let lastUpdated = false;
/* produce an audio segment from the provided text */
const generateAudio = async(text) => {
@@ -169,10 +237,6 @@ class TtsTask extends Task {
this.otelSpan.end();
this.otelSpan = null;
}
if (!servedFromCache && !lastUpdated) {
lastUpdated = true;
updateSpeechCredentialLastUsed(credentials.speech_credential_sid).catch(() => {/* logged error */});
}
if (!servedFromCache && rtt && !preCache && !this._disableTracing) {
this.notifyStatus({
event: 'synthesized-audio',

View File

@@ -126,7 +126,12 @@ class ActionHookDelayProcessor extends Emitter {
try {
this._taskInProgress = makeTask(this.logger, t[0]);
this._taskInProgress.disableTracing = true;
this._taskInProgress.exec(this.cs, {ep: this.ep});
this._taskInProgress.exec(this.cs, {ep: this.ep}).catch((err) => {
this.logger.info(`ActionHookDelayProcessor#_onNoResponseTimer: error playing file: ${err.message}`);
this._taskInProgress = null;
this.ep.removeAllListeners('playback-start');
this.ep.removeAllListeners('playback-stop');
});
} catch (err) {
this.logger.info(err, 'ActionHookDelayProcessor#_onNoResponseTimer: error starting action');
this._taskInProgress = null;

View File

@@ -153,7 +153,7 @@ class Amd extends Emitter {
const wordCount = t.alternatives[0].transcript.split(' ').length;
const final = t.is_final;
const foundHint = hints.find((h) => t.alternatives[0].transcript.includes(h));
const foundHint = hints.find((h) => t.alternatives[0].transcript.toLowerCase().includes(h.toLowerCase()));
if (foundHint) {
/* we detected a common voice mail greeting */
this.logger.debug(`Amd:evaluateTranscription: found hint ${foundHint}`);

View File

@@ -46,6 +46,9 @@ class BackgroundTaskManager extends Emitter {
case 'transcribe':
task = await this._initTranscribe(opts);
break;
case 'ttsStream':
task = await this._initTtsStream(opts);
break;
default:
break;
}
@@ -173,6 +176,25 @@ class BackgroundTaskManager extends Emitter {
return task;
}
// Initiate Tts Stream
async _initTtsStream(opts) {
let task;
try {
const t = normalizeJambones(this.logger, [opts]);
task = makeTask(this.logger, t[0]);
const resources = await this.cs._evaluatePreconditions(task);
const {span, ctx} = this.rootSpan.startChildSpan(`background-ttsStream:${task.summary}`);
task.span = span;
task.ctx = ctx;
task.exec(this.cs, resources)
.then(this._taskCompleted.bind(this, 'ttsStream', task))
.catch(this._taskError.bind(this, 'ttsStream', task));
} catch (err) {
this.logger.info(err, 'BackgroundTaskManager:_initTtsStream - Error creating ttsStream task');
}
return task;
}
_taskCompleted(type, task) {
this.logger.debug({type, task}, `BackgroundTaskManager:_taskCompleted: task completed, sticky: ${task.sticky}`);
task.removeAllListeners();

View File

@@ -129,6 +129,7 @@
},
"SpeechmaticsTranscriptionEvents": {
"Transcription": "speechmatics_transcribe::transcription",
"Translation": "speechmatics_transcribe::translation",
"Info": "speechmatics_transcribe::info",
"RecognitionStarted": "speechmatics_transcribe::recognition_started",
"ConnectFailure": "speechmatics_transcribe::connect_failed",
@@ -174,6 +175,13 @@
"Disconnect": "openai_s2s::disconnect",
"ServerEvent": "openai_s2s::server_event"
},
"LlmEvents_VoiceAgent": {
"Error": "error",
"Connect": "voice_agent_s2s::connect",
"ConnectFailure": "voice_agent_s2s::connect_failed",
"Disconnect": "voice_agent_s2s::disconnect",
"ServerEvent": "voice_agent_s2s::server_event"
},
"QueueResults": {
"Bridged": "bridged",
"Error": "error",
@@ -202,6 +210,8 @@
"verb:status",
"llm:event",
"llm:tool-call",
"tts:tokens-result",
"tts:streaming-event",
"jambonz:error"
],
"RecordState": {
@@ -220,7 +230,45 @@
"ToneTimeout": "amd_tone_timeout",
"Stopped": "amd_stopped"
},
"MediaPath": {
"NoMedia": "no-media",
"PartialMedia": "partial-media",
"FullMedia": "full-media"
},
"DeepgramTtsStreamingEvents": {
"Empty": "deepgram_tts_streaming::empty",
"ConnectFailure": "deepgram_tts_streaming::connect_failed",
"Connect": "deepgram_tts_streaming::connect"
},
"CartesiaTtsStreamingEvents": {
"Empty": "cartesia_tts_streaming::empty",
"ConnectFailure": "cartesia_tts_streaming::connect_failed",
"Connect": "cartesia_tts_streaming::connect"
},
"ElevenlabsTtsStreamingEvents": {
"Empty": "elevenlabs_tts_streaming::empty",
"ConnectFailure": "elevenlabs_tts_streaming::connect_failed",
"Connect": "elevenlabs_tts_streaming::connect"
},
"TtsStreamingEvents": {
"Empty": "tts_streaming::empty",
"Pause": "tts_streaming::pause",
"Resume": "tts_streaming::resume",
"ConnectFailure": "tts_streaming::connect_failed"
},
"TtsStreamingConnectionStatus": {
"NotConnected": "not_connected",
"Connected": "connected",
"Connecting": "connecting",
"Failed": "failed"
},
"MAX_SIMRINGS": 10,
"BONG_TONE": "tone_stream://v=-7;%(100,0,941.0,1477.0);v=-7;>=2;+=.1;%(1400,0,350,440)",
"FS_UUID_SET_NAME": "fsUUIDs"
"FS_UUID_SET_NAME": "fsUUIDs",
"SystemState" : {
"Online": "ONLINE",
"Offline": "OFFLINE",
"GracefulShutdownInProgress":"SHUTDOWN_IN_PROGRESS"
},
"FEATURE_SERVER" : "feature-server"
}

View File

@@ -105,6 +105,13 @@ const speechMapper = (cred) => {
obj.voice_engine = o.voice_engine;
obj.options = o.options;
}
else if ('cartesia' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
obj.embedding = o.embedding;
obj.options = o.options;
}
else if ('rimelabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
@@ -215,11 +222,23 @@ module.exports = (logger, srf) => {
}
};
const lookupVoipCarrierBySid = async(sid) => {
const pp = pool.promise();
try {
const [r] = await pp.query('SELECT * FROM voip_carriers WHERE voip_carrier_sid = ?', [sid]);
return r;
} catch (err) {
logger.error({err}, `lookupVoipCarrierBySid: Error ${sid}`);
}
};
return {
lookupAccountDetails,
updateSpeechCredentialLastUsed,
lookupCarrier,
lookupCarrierByPhoneNumber,
lookupGoogleCustomVoice
lookupGoogleCustomVoice,
lookupVoipCarrierBySid
};
};

View File

@@ -1,9 +1,24 @@
class SpeechCredentialError extends Error {
class NonFatalTaskError extends Error {
constructor(msg) {
super(msg);
}
}
class SpeechCredentialError extends NonFatalTaskError {
constructor(msg) {
super(msg);
}
}
class PlayFileNotFoundError extends NonFatalTaskError {
constructor(url) {
super('File not found');
this.url = url;
}
}
module.exports = {
SpeechCredentialError
SpeechCredentialError,
NonFatalTaskError,
PlayFileNotFoundError
};

View File

@@ -199,7 +199,8 @@ function installSrfLocals(srf, logger) {
} = require('@jambonz/speech-utils')({}, logger);
const {
writeAlerts,
AlertType
AlertType,
writeSystemAlerts
} = require('@jambonz/time-series')(logger, {
host: JAMBONES_TIME_SERIES_HOST,
commitSize: 50,
@@ -269,7 +270,8 @@ function installSrfLocals(srf, logger) {
getFreeswitch,
stats: stats,
writeAlerts,
AlertType
AlertType,
writeSystemAlerts
};
if (localIp) {

32
lib/utils/network.js Normal file
View File

@@ -0,0 +1,32 @@
/**
* Parses a list of hostport entries and selects the first one that matches the specified protocol,
* excluding any entries with the localhost IP address ('127.0.0.1').
*
* Each hostport entry should be in the format: 'protocol/ip:port'
*
* @param {Object} logger - A logging object with a 'debug' method for logging debug messages.
* @param {string} hostport - A comma-separated string containing hostport entries.
* @param {string} protocol - The protocol to match (e.g., 'udp', 'tcp').
* @returns {Array} An array containing:
* 0: protocol
* 1: ip address
* 2: port
*/
const selectHostPort = (logger, hostport, protocol) => {
logger.debug(`selectHostPort: ${hostport}, ${protocol}`);
const sel = hostport
.split(',')
.map((hp) => {
const arr = /(.*)\/(.*):(.*)/.exec(hp);
return [arr[1], arr[2], arr[3]];
})
.filter((hp) => {
return hp[0] === protocol && hp[1] !== '127.0.0.1';
});
return sel[0];
};
module.exports = {
selectHostPort
};

View File

@@ -1,5 +1,5 @@
const Emitter = require('events');
const {CallStatus} = require('./constants');
const {CallStatus, MediaPath} = require('./constants');
const SipError = require('drachtio-srf').SipError;
const {TaskPreconditions, CallDirection} = require('../utils/constants');
const CallInfo = require('../session/call-info');
@@ -381,7 +381,8 @@ class SingleDialer extends Emitter {
callInfo: this.callInfo,
accountInfo: this.accountInfo,
tasks,
rootSpan: this.rootSpan
rootSpan: this.rootSpan,
req: this.req
});
await cs.exec();
@@ -455,21 +456,26 @@ class SingleDialer extends Emitter {
return cs;
}
async releaseMediaToSBC(remoteSdp, localSdp) {
async releaseMediaToSBC(remoteSdp, localSdp, releaseMediaEntirely) {
assert(this.dlg && this.dlg.connected && this.ep && typeof remoteSdp === 'string');
const sdp = stripCodecs(this.logger, remoteSdp, localSdp) || remoteSdp;
await this.dlg.modify(sdp, {
headers: {
'X-Reason': 'release-media'
'X-Reason': releaseMediaEntirely ? 'release-media-entirely' : 'release-media'
}
});
this.ep.destroy()
.then(() => this.ep = null)
.catch((err) => this.logger.error({err}, 'SingleDialer:releaseMediaToSBC: Error destroying endpoint'));
try {
await this.ep.destroy();
} catch (err) {
this.logger.error({err}, 'SingleDialer:releaseMediaToSBC: Error destroying endpoint');
}
this.ep = null;
}
async reAnchorMedia() {
async reAnchorMedia(currentMediaRoute = MediaPath.PartialMedia) {
assert(this.dlg && this.dlg.connected && !this.ep);
this.logger.debug('SingleDialer:reAnchorMedia: re-anchoring media after partial media');
this.ep = await this.ms.createEndpoint({remoteSdp: this.dlg.remote.sdp});
this._configMsEndpoint();
await this.dlg.modify(this.ep.local.sdp, {
@@ -477,6 +483,11 @@ class SingleDialer extends Emitter {
'X-Reason': 'anchor-media'
}
});
if (currentMediaRoute === MediaPath.NoMedia) {
this.logger.debug('SingleDialer:reAnchorMedia: repoint endpoint after no media');
await this.ep.modify(this.dlg.remote.sdp);
}
}
_notifyCallStatusChange({callStatus, sipStatus, sipReason, duration}) {

View File

@@ -46,12 +46,24 @@ module.exports = (logger) => {
const {srf} = require('../..');
srf.locals.publicIp = publicIp;
})
.on(LifeCycleEvents.ScaleIn, () => {
.on(LifeCycleEvents.ScaleIn, async() => {
logger.info('AWS scale-in notification: begin drying up calls');
dryUpCalls = true;
lifecycleEmitter.operationalState = LifeCycleEvents.ScaleIn;
const {srf} = require('../..');
const {writeSystemAlerts} = srf.locals;
if (writeSystemAlerts) {
const {SystemState, FEATURE_SERVER} = require('./constants');
await writeSystemAlerts({
system_component: FEATURE_SERVER,
state : SystemState.GracefulShutdownInProgress,
fields : {
detail: `feature-server with process_id ${process.pid} shutdown in progress`,
host: srf.locals?.ipv4
}
});
}
pingProxies(srf);
// if we have zero calls, we can complete the scale-in right

View File

@@ -109,6 +109,8 @@ const stickyVars = {
'SPEECHMATICS_HOST',
'SPEECHMATICS_PATH',
'SPEECHMATICS_SPEECH_HINTS',
'SPEECHMATICS_TRANSLATION_LANGUAGES',
'SPEECHMATICS_TRANSLATION_PARTIALS'
]
};
@@ -183,7 +185,10 @@ const selectDefaultGoogleModel = (task, language, version) => {
(useV2 ? 'long' : 'latest_long');
};
const consolidateTranscripts = (bufferedTranscripts, channel, language, vendor) => {
if (bufferedTranscripts.length === 1) return bufferedTranscripts[0];
if (bufferedTranscripts.length === 1) {
bufferedTranscripts[0].is_final = true;
return bufferedTranscripts[0];
}
let totalConfidence = 0;
const finalTranscript = bufferedTranscripts.reduce((acc, evt) => {
totalConfidence += evt.alternatives[0].confidence;
@@ -937,11 +942,18 @@ module.exports = (logger) => {
};
}
else if ('speechmatics' === vendor) {
const {speechmaticsOptions = {}} = rOpts;
opts = {
...opts,
...(sttCredentials.api_key) && {SPEECHMATICS_API_KEY: sttCredentials.api_key},
...(sttCredentials.speechmatics_stt_uri) && {SPEECHMATICS_HOST: sttCredentials.speechmatics_stt_uri},
...(rOpts.hints?.length > 0 && {SPEECHMATICS_SPEECH_HINTS: rOpts.hints.join(',')}),
...(speechmaticsOptions.translation_config &&
{
SPEECHMATICS_TRANSLATION_LANGUAGES: speechmaticsOptions.translation_config.target_languages.join(','),
SPEECHMATICS_TRANSLATION_PARTIALS: speechmaticsOptions.translation_config.enable_partials ? 1 : 0
}
),
};
}
else if (vendor.startsWith('custom:')) {

View File

@@ -0,0 +1,326 @@
const Emitter = require('events');
const assert = require('assert');
const {
TtsStreamingEvents,
TtsStreamingConnectionStatus
} = require('../utils/constants');
const FEED_INTERVAL = 2000;
const MAX_CHUNK_SIZE = 1800;
const HIGH_WATER_BUFFER_SIZE = 5000;
const LOW_WATER_BUFFER_SIZE = 1000;
const MIN_INITIAL_WORDS = 4;
const findSentenceBoundary = (text, limit) => {
const sentenceEndRegex = /[.!?](?=\s|$)/g;
let lastSentenceBoundary = -1;
let match;
while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
/* Ensure it's not a decimal point (e.g., "3.14") */
if (match.index === 0 || !/\d$/.test(text[match.index - 1])) {
lastSentenceBoundary = match.index + 1; // Include the punctuation
}
}
return lastSentenceBoundary;
};
const findWordBoundary = (text, limit) => {
const wordBoundaryRegex = /\s+/g;
let lastWordBoundary = -1;
let match;
while ((match = wordBoundaryRegex.exec(text)) && match.index < limit) {
lastWordBoundary = match.index;
}
return lastWordBoundary;
};
class TtsStreamingBuffer extends Emitter {
constructor(cs) {
super();
this.cs = cs;
this.logger = cs.logger;
this.tokens = '';
this.eventHandlers = [];
this._isFull = false;
this._connectionStatus = TtsStreamingConnectionStatus.NotConnected;
this._flushPending = false;
this._countSendsInThisTurn = 0;
}
get isEmpty() {
return this.tokens.length === 0;
}
get isFull() {
return this._isFull;
}
get size() {
return this.tokens.length;
}
get ep() {
return this.cs?.ep;
}
async start() {
assert.ok(
this._connectionStatus === TtsStreamingConnectionStatus.NotConnected,
'TtsStreamingBuffer:start already started, or has failed');
this.vendor = this.cs.getTsStreamingVendor();
if (!this.vendor) {
this.logger.info('TtsStreamingBuffer:start No TTS streaming vendor configured');
throw new Error('No TTS streaming vendor configured');
}
this.logger.info(`TtsStreamingBuffer:start Connecting to TTS streaming with vendor ${this.vendor}`);
this._connectionStatus = TtsStreamingConnectionStatus.Connecting;
try {
if (this.eventHandlers.length === 0) this._initHandlers(this.ep);
await this._api(this.ep, [this.ep.uuid, 'connect']);
} catch (err) {
this.logger.info({err}, 'TtsStreamingBuffer:start Error connecting to TTS streaming');
this._connectionStatus = TtsStreamingConnectionStatus.Failed;
}
}
stop() {
clearTimeout(this.timer);
this.removeCustomEventListeners();
if (this.ep) {
this._api(this.ep, [this.ep.uuid, 'close'])
.catch((err) => this.logger.info({err}, 'TtsStreamingBuffer:kill Error closing TTS streaming'));
}
this.timer = null;
this.tokens = '';
this._connectionStatus = TtsStreamingConnectionStatus.NotConnected;
}
/**
* Add tokens to the buffer and start feeding them to the endpoint if necessary.
*/
async bufferTokens(tokens) {
if (this._connectionStatus === TtsStreamingConnectionStatus.Failed) {
this.logger.info('TtsStreamingBuffer:bufferTokens TTS streaming connection failed, rejecting request');
return {status: 'failed', reason: `connection to ${this.vendor} failed`};
}
const displayedTokens = tokens.length <= 40 ? tokens : tokens.substring(0, 40);
const totalLength = tokens.length;
/* if we crossed the high water mark, reject the request */
if (this.tokens.length + totalLength > HIGH_WATER_BUFFER_SIZE) {
this.logger.info(
`TtsStreamingBuffer:bufferTokensTTS buffer is full, rejecting request to buffer ${totalLength} tokens`);
if (!this._isFull) {
this._isFull = true;
this.emit(TtsStreamingEvents.Pause);
}
return {status: 'failed', reason: 'full'};
}
this.logger.debug(
`TtsStreamingBuffer:bufferTokens "${displayedTokens}" (length: ${totalLength}), starting? ${this.isEmpty}`
);
this.tokens += (tokens || '');
const leftoverTokens = await this._feedTokens();
/* do we need to start a timer to periodically feed tokens to the endpoint? */
if (this.isEmpty && leftoverTokens > 0) {
assert(!this.timer);
this.timer = setInterval(async() => {
const remaining = await this._feedTokens();
if (remaining === 0) {
clearInterval(this.timer);
this.timer = null;
}
}, FEED_INTERVAL);
}
return {status: 'ok'};
}
flush() {
this.logger.debug('TtsStreamingBuffer:flush');
if (this._connectionStatus === TtsStreamingConnectionStatus.Connecting) {
this.logger.debug('TtsStreamingBuffer:flush TTS stream is not quite ready - wait for connect');
this._flushPending = true;
return;
}
else if (this._connectionStatus === TtsStreamingConnectionStatus.Connected) {
this._countSendsInThisTurn = 0;
this._api(this.ep, [this.ep.uuid, 'flush'])
.catch((err) => this.logger.info({err},
`TtsStreamingBuffer:flush Error flushing TTS streaming: ${JSON.stringify(err)}`));
}
}
clear() {
this.logger.debug('TtsStreamingBuffer:clear');
if (this._connectionStatus !== TtsStreamingConnectionStatus.Connected) return;
clearTimeout(this.timer);
this._api(this.ep, [this.ep.uuid, 'clear'])
.catch((err) => this.logger.info({err}, 'TtsStreamingBuffer:clear Error clearing TTS streaming'));
this.tokens = '';
this.timer = null;
this._isFull = false;
}
/**
* Send the next chunk of tokens to the endpoint (max 2000 chars)
* Return the number of tokens left in the buffer.
*/
async _feedTokens() {
this.logger.debug({tokens: this.tokens}, '_feedTokens');
try {
if (!this.cs.isTtsStreamOpen || !this.ep || !this.tokens) {
this.logger.debug('TTS stream is not open or no tokens to send');
return this.tokens?.length || 0;
}
if (this._connectionStatus === TtsStreamingConnectionStatus.NotConnected ||
this._connectionStatus === TtsStreamingConnectionStatus.Failed) {
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not connected');
return this.tokens.length;
}
if (this._connectionStatus === TtsStreamingConnectionStatus.Connecting) {
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not ready, waiting for connect');
return this.tokens.length;
}
/**
* Rules:
* 1. If this is our first send, we must have at least N words
* 2. Otherwise, must EITHER have N words OR be the ending of a sentence
*
* When sending, send the max size possible, capped at a limit to avoid overwhelming the server.
*/
/* must have at least N words, or be the ending of a sentence */
const words = this.tokens.split(' ').length;
if (words < MIN_INITIAL_WORDS) {
const endsWithPunctuation = /[.!?]$/.test(this.tokens);
if (!endsWithPunctuation || this._countSendsInThisTurn === 0) {
this.logger.debug(`TtsStreamingBuffer:_feedTokens: only ${words} words to send, waiting for more`);
return this.tokens.length;
}
}
const limit = Math.min(MAX_CHUNK_SIZE, this.tokens.length);
let chunkEnd = findSentenceBoundary(this.tokens, limit);
if (chunkEnd === -1) {
this.logger.debug('TtsStreamingBuffer:_feedTokens: no sentence boundary found, look for word boundary');
chunkEnd = findWordBoundary(this.tokens, limit);
}
if (chunkEnd === -1) {
chunkEnd = limit;
}
const chunk = this.tokens.slice(0, chunkEnd);
this.tokens = this.tokens.slice(chunkEnd); // Remove sent chunk
/* freeswitch looks for sequence of 2 newlines to determine end of message, so insert a space */
const modifiedChunk = chunk.replace(/\n\n/g, '\n \n');
if (modifiedChunk.length > 0) {
try {
this._countSendsInThisTurn++;
this.logger.debug({tokens: modifiedChunk},
`TtsStreamingBuffer:_feedTokens: sending tokens, in send#${this._countSendsInThisTurn}`);
await this._api(this.ep, [this.ep.uuid, 'send', modifiedChunk]);
} catch (err) {
this.logger.info({err}, 'TtsStreamingBuffer:_feedTokens Error sending TTS chunk');
}
this.logger.debug(`TtsStreamingBuffer:_feedTokens: sent ${chunk.length}, remaining: ${this.tokens.length}`);
if (this.isFull && this.tokens.length <= LOW_WATER_BUFFER_SIZE) {
this.logger.info('TtsStreamingBuffer:_feedTokens TTS streaming buffer is no longer full');
this._isFull = false;
this.emit(TtsStreamingEvents.Resume);
}
}
} catch (err) {
this.logger.info({err}, 'TtsStreamingBuffer:_feedTokens Error sending TTS chunk');
this.tokens = '';
}
if (0 === this.tokens.length && this.timer) {
clearTimeout(this.timer);
this.timer = null;
}
return this.tokens.length;
}
async _api(ep, args) {
const apiCmd = `uuid_${this.vendor}_tts_streaming`;
const res = await ep.api(apiCmd, `^^|${args.join('|')}`);
if (!res.body?.startsWith('+OK')) {
throw new Error({args}, `Error calling ${apiCmd}: ${res.body}`);
}
}
_onConnectFailure(vendor) {
this.logger.info(`streaming tts connection failed to ${vendor}`);
this._connectionStatus = TtsStreamingConnectionStatus.Failed;
this.tokens = '';
this.emit(TtsStreamingEvents.ConnectFailure, {vendor});
}
async _onConnect(vendor) {
this.logger.info(`streaming tts connection made to ${vendor}`);
this._connectionStatus = TtsStreamingConnectionStatus.Connected;
if (this.tokens.length > 0) {
await this._feedTokens();
}
if (this._flushPending) {
this.flush();
this._flushPending = false;
}
}
_onTtsEmpty(vendor) {
this.emit(TtsStreamingEvents.Empty, {vendor});
}
addCustomEventListener(ep, event, handler) {
this.eventHandlers.push({ep, event, handler});
ep.addCustomEventListener(event, handler);
}
removeCustomEventListeners() {
this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
}
_initHandlers(ep) {
[
// DH: add other vendors here as modules are added
'deepgram',
'cartesia',
'elevenlabs'
].forEach((vendor) => {
const eventClassName = `${vendor.charAt(0).toUpperCase() + vendor.slice(1)}TtsStreamingEvents`;
const eventClass = require('../utils/constants')[eventClassName];
if (!eventClass) throw new Error(`Event class for vendor ${vendor} not found`);
this.addCustomEventListener(ep, eventClass.Connect, this._onConnect.bind(this, vendor));
this.addCustomEventListener(ep, eventClass.ConnectFailure, this._onConnectFailure.bind(this, vendor));
this.addCustomEventListener(ep, eventClass.Empty, this._onTtsEmpty.bind(this, vendor));
});
}
}
module.exports = TtsStreamingBuffer;

View File

@@ -12,6 +12,20 @@ const {
JAMBONES_WS_MAX_PAYLOAD,
HTTP_USER_AGENT_HEADER
} = require('../config');
const MTYPE_WANTS_ACK = [
'call:status',
'verb:status',
'jambonz:error',
'llm:event',
'llm:tool-call',
'tts:streaming-event',
'tts:tokens-result',
];
const MTYPE_NO_DATA = [
'llm:tool-output',
'tts:flush',
'tts:clear'
];
class WsRequestor extends BaseRequestor {
constructor(logger, account_sid, hook, secret) {
@@ -44,7 +58,7 @@ class WsRequestor extends BaseRequestor {
async request(type, hook, params, httpHeaders = {}) {
assert(HookMsgTypes.includes(type));
const url = hook.url || hook;
const wantsAck = !['call:status', 'verb:status', 'jambonz:error', 'llm:event', 'llm:tool-call'].includes(type);
const wantsAck = !MTYPE_WANTS_ACK.includes(type);
if (this.maliciousClient) {
this.logger.info({url: this.url}, 'WsRequestor:request - discarding msg to malicious client');
@@ -408,7 +422,7 @@ class WsRequestor extends BaseRequestor {
case 'command':
assert.ok(command, 'command property not supplied');
assert.ok(data || command === 'llm:tool-output', 'data property not supplied');
assert.ok(data || MTYPE_NO_DATA.includes(command), 'data property not supplied');
this._recvCommand(msgid, command, call_sid, queueCommand, tool_call_id, data);
break;

630
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -31,10 +31,10 @@
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.1.20",
"@jambonz/speech-utils": "^0.2.1",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.9",
"@jambonz/verb-specifications": "^0.0.83",
"@jambonz/verb-specifications": "^0.0.90",
"@jambonz/time-series": "^0.2.13",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
@@ -47,7 +47,7 @@
"bent": "^7.3.12",
"debug": "^4.3.4",
"deepcopy": "^2.1.0",
"drachtio-fsmrf": "^3.0.45",
"drachtio-fsmrf": "^3.0.46",
"drachtio-srf": "^4.5.35",
"express": "^4.19.2",
"express-validator": "^7.0.1",

View File

@@ -351,6 +351,8 @@ speech_credential_sid CHAR(36) NOT NULL,
model VARCHAR(512) NOT NULL,
reported_usage ENUM('REPORTED_USAGE_UNSPECIFIED','REALTIME','OFFLINE') DEFAULT 'REALTIME',
name VARCHAR(64) NOT NULL,
voice_cloning_key MEDIUMTEXT,
use_voice_cloning_key BOOLEAN DEFAULT false,
PRIMARY KEY (google_custom_voice_sid)
);

View File

@@ -42,7 +42,7 @@ services:
ipv4_address: 172.38.0.7
drachtio:
image: drachtio/drachtio-server:0.8.25-rc8
image: drachtio/drachtio-server:0.8.26
restart: always
command: drachtio --contact "sip:*;transport=udp" --mtu 4096 --address 0.0.0.0 --port 9022
ports:
@@ -57,7 +57,7 @@ services:
condition: service_healthy
freeswitch:
image: drachtio/drachtio-freeswitch-mrf:latest
image: drachtio/drachtio-freeswitch-mrf:0.9.2-4
restart: always
command: freeswitch --rtp-range-start 20000 --rtp-range-end 20100
environment:

View File

@@ -25,29 +25,38 @@ module.exports = (serviceName) => {
}),
});
let exporter;
const exporters = [];
if (OTEL_EXPORTER_JAEGER_AGENT_HOST || OTEL_EXPORTER_JAEGER_ENDPOINT) {
exporter = new JaegerExporter();
}
else if (OTEL_EXPORTER_ZIPKIN_URL) {
exporter = new ZipkinExporter({url:OTEL_EXPORTER_ZIPKIN_URL});
}
else {
exporter = new OTLPTraceExporter({
url: OTEL_EXPORTER_COLLECTOR_URL
});
exporters.push(new JaegerExporter());
}
provider.addSpanProcessor(new BatchSpanProcessor(exporter, {
// The maximum queue size. After the size is reached spans are dropped.
maxQueueSize: 100,
// The maximum batch size of every export. It must be smaller or equal to maxQueueSize.
maxExportBatchSize: 10,
// The interval between two consecutive exports
scheduledDelayMillis: 500,
// How long the export can run before it is cancelled
exportTimeoutMillis: 30000,
}));
if (OTEL_EXPORTER_ZIPKIN_URL) {
exporters.push(new ZipkinExporter({url:OTEL_EXPORTER_ZIPKIN_URL}));
}
if (OTEL_EXPORTER_ZIPKIN_URL) {
exporters.push(new ZipkinExporter({url:OTEL_EXPORTER_ZIPKIN_URL}));
}
if (OTEL_EXPORTER_COLLECTOR_URL) {
exporters.push(new OTLPTraceExporter({
url: OTEL_EXPORTER_COLLECTOR_URL
}));
}
exporters.forEach((element) => {
provider.addSpanProcessor(new BatchSpanProcessor(element, {
// The maximum queue size. After the size is reached spans are dropped.
maxQueueSize: 100,
// The maximum batch size of every export. It must be smaller or equal to maxQueueSize.
maxExportBatchSize: 10,
// The interval between two consecutive exports
scheduledDelayMillis: 500,
// How long the export can run before it is cancelled
exportTimeoutMillis: 30000,
}));
});
// Initialize the OpenTelemetry APIs to use the NodeTracerProvider bindings
provider.register();