Compare commits

..

19 Commits

Author SHA1 Message Date
Sam Machin
a0a579ccee escape json special chars in metadata (#1399) 2025-10-20 10:30:03 -04:00
Sam Machin
4218653852 add customerData on transferred calls (#1391)
* add customerData on transferred calls

* change to if statement
2025-10-20 09:20:12 -04:00
Hoan Luu Huu
89cc39f726 support gladia stt (#1397)
* support gladia stt

* wip

* wip

* update verb specification
2025-10-20 04:56:39 -04:00
Sam Machin
b231593bff bump dbhelpers for cache change (#1396) 2025-10-15 11:38:43 -04:00
Sam Machin
4309d25376 don't encode querystring if its the filename (#1395)
* don't encode querystring if its the filename

* lint

* update link to issue

u
2025-10-14 10:48:50 -04:00
Hoan Luu Huu
a00703a067 support houndify stt (#1364)
* support houndify stt

* wip

* wip

* wip

* update houndify stt parameters

* wip

* wip
2025-10-14 00:55:21 -04:00
Hoan Luu Huu
89c985b564 fixed does not send final status call back if call canceled quickly (#1393)
* fixed callsession should cleanup resource if call was canceled while fetching app

* wip

* wip

* wip

* wip

* wip
2025-10-11 03:44:42 -04:00
Dave Horton
b4ed4c8c46 #1385: Gather - dont start the continuous asr timer when we first start listening if this is a background gather (#1386) 2025-10-09 08:47:51 -04:00
Hoan Luu Huu
581d309f36 support elevenlabs different endpoint (#1387)
* support elevenlabs different endpoint

* wip

* wip
2025-10-09 08:19:40 -04:00
Sam Machin
d1baf2fe37 if call is transferred from another FS then always answer (#1383)
Currently if the call being transferred was originally an outbound call then the direction thats retrieved from redis is outbound and the invite of the refer from the other FS is never answered,
However a transferredCall will always need to be answered regardless of CallDirection
2025-10-07 07:19:11 -04:00
Dave Horton
28bf0d3477 send eager_eot events (#1382) 2025-10-06 16:50:20 -04:00
Dave Horton
d2d3b4583e Fix/flux cleanup (#1379)
* for deepgram flux include the turn taking events in the transcription payload

* for deepgram flux, including turn_taking_event in the speech payload

* fix prev commit which used wrong field
2025-10-04 20:06:38 -04:00
Hoan Luu Huu
854c26db11 support deepgramflux (#1373)
* support deepgramflux

* wip

* wip

* wip

* wip

* update verb scpecification
2025-10-03 10:38:39 -04:00
Dave Horton
e77666a1a7 update speech-utils and drachtio-srf (#1377) 2025-10-03 10:06:37 -04:00
Ed Robbins
5acb19225b If an error occurs during initial TTS request, propagate the error (#1369)
* If an error occurs during initial TTS request, propagate the error

* fix missing semicolon

* fix jslint error

* add null check to r.playbackMilliseconds
2025-10-01 00:02:51 -04:00
Dave Horton
1d6f84c2d7 add event handler for when deepgram closes with an error (#1372) 2025-09-28 14:18:56 -04:00
Vinod Dharashive
de9b970a93 Update example-voicemail-greetings.json (#1367)
Add JP greetin
2025-09-23 09:30:40 -04:00
rammohan-y
ec786ef1dd Fix for sending synthesized-audio verb:status event when using TTS streaming (#1366)
https://github.com/jambonz/jambonz-feature-server/issues/1365
2025-09-23 09:30:05 -04:00
Sam Machin
a95a6d1683 clear main timout when interdigit timeout is started, (#1351)
* clear main timout when interdigit timeout is started,

also clear the asrTimeout when dtmf has taken priority

* lint

* lint
2025-09-12 09:01:51 -04:00
16 changed files with 499 additions and 62 deletions

View File

@@ -174,5 +174,61 @@
"non è raggiungibile",
"lascia pure un messaggio",
"puoi lasciare un messaggio"
],
"ja-JP": [
"この通話は留守番電話に転送されました",
"発信先は現在電話に出ることができません",
"発信音の後でメッセージを録音してください",
"録音を完了したら電話を切ることができます",
"只今電話に出ることができません",
"ただ今電話に出ることができません",
"ただいま電話に出ることができません",
"ピーという発信音の後にお名前とご用件をお話しください",
"ファックスを送られる方はスタートボタンを押してください",
"FAXを送られる方はスタートボタンを押してください",
"おかけになった電話をお呼びしましたが",
"お出になりません",
"おでになりません",
"お掛けになった電話番号は",
"おかけになった電話番号は",
"お掛けになった電話は",
"おかけになった電話は",
"現在使われておりません",
"番号をお確かめになって",
"お掛け直し下さい",
"おかけ直し下さい",
"おかけ直しください",
"こちらはNTTドコモです",
"こちらはエーユーです",
"こちらはソフトバンクです",
"電波の届かない",
"電源が入っていない",
"掛かりません",
"かかりません",
"お繋ぎすることが出来ません",
"お繋ぎ出来ません",
"お繋ぎすることができません",
"お繋ぎできません",
"おつなぎすることができません",
"おつなぎできません",
"メッセージを録音",
"留守番電話",
"お留守番サービス",
"留守番",
"留守電",
"留守",
"接続します",
"合図の音",
"ピーと",
"発信音",
"ご用件",
"伝言",
"お話しください",
"ファックス",
"FAX",
"終了",
"終了しました",
"終了いたしました",
"営業時間"
]
}

View File

@@ -112,6 +112,14 @@ module.exports = function(srf, logger) {
req.locals.callingNumber = sipURIs[1];
}
}
// Feature server INVITE request pipelines taking time to finish,
// while connecting and fetch application from db and invoking webhook.
// call can be canceled without any handling, so we add a listener here
req.once('cancel', (sipMsg) => {
logger.info(`${callId} got CANCEL request`);
req.locals.canceled = true;
});
next();
}
@@ -362,13 +370,14 @@ module.exports = function(srf, logger) {
});
// if transferred call contains callInfo, let update original data to newly created callInfo in this instance.
if (app.transferredCall && app.callInfo) {
const {direction, callerName, from, to, originatingSipIp, originatingSipTrunkName} = app.callInfo;
const {direction, callerName, from, to, originatingSipIp, originatingSipTrunkName, customerData} = app.callInfo;
req.locals.callInfo.direction = direction;
req.locals.callInfo.callerName = callerName;
req.locals.callInfo.from = from;
req.locals.callInfo.to = to;
req.locals.callInfo.originatingSipIp = originatingSipIp;
req.locals.callInfo.originatingSipTrunkName = originatingSipTrunkName;
if (customerData) req.locals.callInfo.customerData = customerData;
delete app.callInfo;
}
next();

View File

@@ -1086,6 +1086,13 @@ class CallSession extends Emitter {
deepgram_stt_use_tls: credential.deepgram_stt_use_tls
};
}
else if ('gladia' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
api_key: credential.api_key,
region: credential.region,
};
}
else if ('soniox' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
@@ -1117,6 +1124,7 @@ class CallSession extends Emitter {
return {
api_key: credential.api_key,
model_id: credential.model_id,
api_uri: credential.api_uri,
options: credential.options
};
}
@@ -1165,7 +1173,15 @@ class CallSession extends Emitter {
service_version: credential.service_version
};
}
else if ('deepgramriver' === vendor) {
else if ('houndify' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
client_id: credential.client_id,
client_key: credential.client_key,
user_id: credential.user_id
};
}
else if ('deepgramflux' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
api_key: credential.api_key,
@@ -2399,7 +2415,7 @@ Duration=${duration} `
this.logger.debug(`endpoint was destroyed!! ${this.ep.uuid}`);
});
if (this.direction === CallDirection.Inbound) {
if (this.direction === CallDirection.Inbound || this.application?.transferredCall) {
if (task.earlyMedia && !this.req.finalResponseSent) {
this.res.send(183, {body: ep.local.sdp});
return {ep};

View File

@@ -22,6 +22,12 @@ class InboundCallSession extends CallSession {
this.req = req;
this.res = res;
// if the call was canceled before we got here, handle it
if (this.req.locals.canceled) {
req.locals.logger.info('InboundCallSession: constructor - call was already canceled');
this._onCancel();
}
req.once('cancel', this._onCancel.bind(this));
this.on('callStatusChange', this._notifyCallStatusChange.bind(this));

View File

@@ -5,13 +5,15 @@ const {
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
GladiaTranscriptionEvents,
SonioxTranscriptionEvents,
CobaltTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents,
AssemblyAiTranscriptionEvents,
DeepgramRiverTranscriptionEvents,
HoundifyTranscriptionEvents,
DeepgramfluxTranscriptionEvents,
VoxistTranscriptionEvents,
CartesiaTranscriptionEvents,
OpenAITranscriptionEvents,
@@ -93,6 +95,8 @@ class TaskGather extends SttTask {
get needsStt() { return this.input.includes('speech'); }
get isBackgroundGather() { return this.bugname_prefix === 'background_bargeIn_'; }
get wantsSingleUtterance() {
return this.data.recognizer?.singleUtterance === true;
}
@@ -227,7 +231,9 @@ class TaskGather extends SttTask {
const startListening = async(cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
if (this.isContinuousAsr && 0 === this.timeout && !this.isBackgroundGather) {
this._startAsrTimer();
}
if (this.input.includes('speech') && !this.listenDuringPrompt) {
try {
await this._setSpeechHandlers(cs, ep);
@@ -395,6 +401,7 @@ class TaskGather extends SttTask {
if (this.digitBuffer.length === 0 && this.needsStt) {
// DTMF is higher priority than STT.
this.removeCustomEventListeners();
this._clearAsrTimer(); //clear ASR timer as we're now using dtmf
this._stopTranscribing(ep);
}
this.digitBuffer += evt.dtmf;
@@ -409,6 +416,7 @@ class TaskGather extends SttTask {
const ms = this.interDigitTimeout * 1000;
this.logger.debug(`starting interdigit timer of ${ms}`);
this.interDigitTimer = setTimeout(() => this._resolve('dtmf-interdigit-timeout'), ms);
this._clearTimer(); //clear main timer as we're now using interdigit dtmf timer
}
}
@@ -466,16 +474,28 @@ class TaskGather extends SttTask {
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'deepgramriver':
this.bugname = `${this.bugname_prefix}deepgramriver_transcribe`;
case 'deepgramflux':
this.bugname = `${this.bugname_prefix}deepgramflux_transcribe`;
this.addCustomEventListener(
ep, DeepgramRiverTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep, DeepgramfluxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(
ep, DeepgramRiverTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.ConnectFailure,
ep, DeepgramfluxTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'gladia':
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
this.addCustomEventListener(
ep, GladiaTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'soniox':
@@ -555,6 +575,18 @@ class TaskGather extends SttTask {
this._onVendorConnectFailure.bind(this, cs, ep));
break;
case 'houndify':
this.bugname = `${this.bugname_prefix}houndify_transcribe`;
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Error,
this._onVendorError.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
break;
case 'voxist':
this.bugname = `${this.bugname_prefix}voxist_transcribe`;
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription,
@@ -702,8 +734,8 @@ class TaskGather extends SttTask {
this.logger.debug(`Starting timoutTimer of ${this.timeout}ms`);
this._clearTimer();
this._timeoutTimer = setTimeout(() => {
// If continuousASR in use then extend by the asr window for more transcripts.
if (this.isContinuousAsr) this._startAsrTimer();
if (this.interDigitTimer) return; // let the inter-digit timer complete
else {
this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
}
@@ -910,7 +942,7 @@ class TaskGather extends SttTask {
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language,
this.shortUtterance, this.data.recognizer.punctuation);
//this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
if (evt.alternatives.length === 0) {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
@@ -1076,6 +1108,11 @@ class TaskGather extends SttTask {
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt},
this.cs.callInfo, httpHeaders));
}
else if (this.vendor === 'deepgramflux' &&
['EagerEndOfTurn', 'TurnResumed'].includes(evt.vendor.evt?.event)) {
this.logger.debug(`Gather:_onTranscription - deepgramflux event detected: ${evt.event}`);
this.performAction({speech: evt, reason: 'speechDetected'}, false);
}
if (this.vendor === 'soniox') {
if (evt.vendor.finalWords.length) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');

View File

@@ -5,6 +5,17 @@ const moment = require('moment');
const MAX_PLAY_AUDIO_QUEUE_SIZE = 10;
const DTMF_SPAN_NAME = 'dtmf';
function escapeString(str) {
return str
.replace(/\\/g, '\\\\') // Escape backslashes
.replace(/"/g, '\\"') // Escape double quotes
.replace(/\b/g, '\\b') // Escape backspace
.replace(/\f/g, '\\f') // Escape formfeed
.replace(/\n/g, '\\n') // Escape newlines
.replace(/\r/g, '\\r') // Escape carriage returns
.replace(/\t/g, '\\t'); // Escape tabs
}
class TaskListen extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts);
@@ -16,10 +27,20 @@ class TaskListen extends Task {
this.preconditions = TaskPreconditions.Endpoint;
[
'action', 'auth', 'method', 'url', 'finishOnKey', 'maxLength', 'metadata', 'mixType', 'passDtmf', 'playBeep',
'action', 'auth', 'method', 'url', 'finishOnKey', 'maxLength', 'mixType', 'passDtmf', 'playBeep',
'sampleRate', 'timeout', 'transcribe', 'wsAuth', 'disableBidirectionalAudio', 'channel'
].forEach((k) => this[k] = this.data[k]);
//Escape JSON special characters in metadata
if (this.data.metadata) {
this.metadata = {};
for (const key in this.data.metadata) {
if (this.data.metadata.hasOwnProperty(key)) {
this.metadata[key] = escapeString(this.data.metadata[key]);
}
}
}
this.mixType = this.mixType || 'mono';
this.sampleRate = this.sampleRate || 8000;
this.earlyMedia = this.data.earlyMedia === true;

View File

@@ -6,9 +6,21 @@ class TaskPlay extends Task {
super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint;
this.url = this.data.url.includes('?')
? this.data.url.split('?')[0] + '?' + this.data.url.split('?')[1].replaceAll('.', '%2E')
: this.data.url;
//Cleanup URLs that contain a querystring with a . unless that querystring is the filename
// see https://github.com/jambonz/jambonz-feature-server/pull/1293
// and https://github.com/jambonz/jambonz-feature-server/issues/1394 for background
if (this.data.url.includes('?')) {
if (['.mp3', '.wav'].includes(this.data.url.slice(-4))) {
this.url = this.data.url;
}
else {
this.url = this.data.url.split('?')[0] + '?' + this.data.url.split('?')[1].replaceAll('.', '%2E');
}
}
else {
this.url = this.data.url;
}
this.seekOffset = this.data.seekOffset || -1;
this.timeoutSecs = this.data.timeoutSecs || -1;
this.loop = this.data.loop || 1;

View File

@@ -409,6 +409,9 @@ class TaskSay extends TtsTask {
});
const r = await ep.play(filename);
this.logger.debug({r}, 'Say:exec play result');
if (r.playbackSeconds == null && r.playbackMilliseconds == null && r.playbackLastOffsetPos == null) {
this._playReject(new Error('Playback failed to start'));
}
try {
// wait for playback-stop event received to confirm if the playback is successful
await this._playPromise;

View File

@@ -203,6 +203,56 @@ class SttTask extends Task {
if (cs.hasGlobalSttPunctuation && !this.data.recognizer.punctuation) {
this.data.recognizer.punctuation = cs.globalSttPunctuation;
}
if (this.vendor === 'gladia') {
const { api_key, region } = this.sttCredentials;
const {url} = await this.createGladiaLiveSession({
api_key, region,
model: this.data.recognizer.model || 'solaria-1',
options: this.data.recognizer.gladiaOptions || {}
});
const {host, pathname, search} = new URL(url);
this.sttCredentials.host = host;
this.sttCredentials.path = `${pathname}${search}`;
}
}
async createGladiaLiveSession({
api_key,
region = 'us-west',
model = 'solaria-1',
options = {},
}) {
const url = `https://api.gladia.io/v2/live?region=${region}`;
const response = await fetch(url, {
method: 'POST',
headers: {
'x-gladia-key': api_key,
'Content-Type': 'application/json'
},
body: JSON.stringify({
encoding: 'wav/pcm',
bit_depth: 16,
sample_rate: 8000,
channels: 1,
model,
...options,
messages_config: {
receive_final_transcripts: true,
receive_speech_events: true,
receive_errors: true,
}
})
});
if (!response.ok) {
const error = await response.text();
this.logger.error({url, status: response.status, error}, 'Error creating Gladia live session');
throw new Error(`Error creating Gladia live session: ${response.status} ${error}`);
}
const data = await response.json();
this.logger.debug({url: data.url}, 'Gladia Call registered');
return data;
}
addCustomEventListener(ep, event, handler) {

View File

@@ -6,7 +6,8 @@ const {
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
DeepgramRiverTranscriptionEvents,
GladiaTranscriptionEvents,
DeepgramfluxTranscriptionEvents,
SonioxTranscriptionEvents,
CobaltTranscriptionEvents,
IbmTranscriptionEvents,
@@ -14,6 +15,7 @@ const {
JambonzTranscriptionEvents,
TranscribeStatus,
AssemblyAiTranscriptionEvents,
HoundifyTranscriptionEvents,
VoxistTranscriptionEvents,
CartesiaTranscriptionEvents,
OpenAITranscriptionEvents,
@@ -237,19 +239,35 @@ class TaskTranscribe extends SttTask {
this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
/* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */
//if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
break;
case 'deepgramriver':
this.bugname = `${this.bugname_prefix}deepgramriver_transcribe`;
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.Transcription,
case 'deepgramflux':
this.bugname = `${this.bugname_prefix}deepgramflux_transcribe`;
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.Connect,
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.ConnectFailure,
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'gladia':
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'soniox':
this.bugname = `${this.bugname_prefix}soniox_transcribe`;
@@ -320,6 +338,18 @@ class TaskTranscribe extends SttTask {
this._onVendorConnectFailure.bind(this, cs, ep, channel));
break;
case 'houndify':
this.bugname = `${this.bugname_prefix}houndify_transcribe`;
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Error,
this._onVendorError.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
break;
case 'voxist':
this.bugname = `${this.bugname_prefix}voxist_transcribe`;
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription,

View File

@@ -81,7 +81,7 @@ class TtsTask extends Task {
}
async setTtsStreamingChannelVars(vendor, language, voice, credentials, ep) {
const {api_key, model_id, custom_tts_streaming_url, auth_token} = credentials;
const {api_key, model_id, api_uri, custom_tts_streaming_url, auth_token} = credentials;
let obj;
this.logger.debug(`setTtsStreamingChannelVars: vendor: ${vendor}, language: ${language}, voice: ${voice}`);
@@ -105,6 +105,7 @@ class TtsTask extends Task {
const {stability, similarity_boost, use_speaker_boost, style, speed} = this.options.voice_settings || {};
obj = {
ELEVENLABS_API_KEY: api_key,
...(api_uri && {ELEVENLABS_API_URI: api_uri}),
ELEVENLABS_TTS_STREAMING_MODEL_ID: model_id,
ELEVENLABS_TTS_STREAMING_VOICE_ID: voice,
// 20/12/2024 - only eleven_turbo_v2_5 support multiple language
@@ -327,6 +328,13 @@ class TtsTask extends Task {
this.playbackIds.push(extractPlaybackId(filePath));
this.logger.debug({playbackIds: this.playbackIds}, 'Say: a streaming tts api will be used');
const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`);
this.notifyStatus({
event: 'synthesized-audio',
vendor,
language,
servedFromCache,
'id': this.id
});
return modifiedPath;
}
return filePath;

View File

@@ -94,12 +94,20 @@
"DeepgramTranscriptionEvents": {
"Transcription": "deepgram_transcribe::transcription",
"ConnectFailure": "deepgram_transcribe::connect_failed",
"Connect": "deepgram_transcribe::connect"
"Connect": "deepgram_transcribe::connect",
"Error": "deepgram_transcribe::error"
},
"DeepgramRiverTranscriptionEvents": {
"Transcription": "deepgramriver_transcribe::transcription",
"ConnectFailure": "deepgramriver_transcribe::connect_failed",
"Connect": "deepgramriver_transcribe::connect"
"DeepgramfluxTranscriptionEvents": {
"Transcription": "deepgramflux_transcribe::transcription",
"ConnectFailure": "deepgramflux_transcribe::connect_failed",
"Connect": "deepgramflux_transcribe::connect",
"Error": "deepgramflux_transcribe::error"
},
"GladiaTranscriptionEvents": {
"Transcription": "gladia_transcribe::transcription",
"ConnectFailure": "gladia_transcribe::connect_failed",
"Connect": "gladia_transcribe::connect",
"Error": "gladia_transcribe::error"
},
"SonioxTranscriptionEvents": {
"Transcription": "soniox_transcribe::transcription",
@@ -167,6 +175,12 @@
"ConnectFailure": "assemblyai_transcribe::connect_failed",
"Connect": "assemblyai_transcribe::connect"
},
"HoundifyTranscriptionEvents": {
"Transcription": "houndify_transcribe::transcription",
"Error": "houndify_transcribe::error",
"ConnectFailure": "houndify_transcribe::connect_failed",
"Connect": "houndify_transcribe::connect"
},
"VoxistTranscriptionEvents": {
"Transcription": "voxist_transcribe::transcription",
"Error": "voxist_transcribe::error",

View File

@@ -81,7 +81,12 @@ const speechMapper = (cred) => {
obj.deepgram_tts_uri = o.deepgram_tts_uri;
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
}
else if ('deepgramriver' === obj.vendor) {
else if ('gladia' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.region = o.region;
}
else if ('deepgramflux' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
}
@@ -101,6 +106,7 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
obj.api_uri = o.api_uri;
obj.options = o.options;
}
else if ('playht' === obj.vendor) {
@@ -141,6 +147,12 @@ const speechMapper = (cred) => {
obj.api_key = o.api_key;
obj.service_version = o.service_version;
}
else if ('houndify' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
obj.client_key = o.client_key;
obj.user_id = o.user_id;
}
else if ('voxist' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;

View File

@@ -131,6 +131,43 @@ const stickyVars = {
'OPENAI_TURN_DETECTION_PREFIX_PADDING_MS',
'OPENAI_TURN_DETECTION_SILENCE_DURATION_MS',
],
houndify: [
'HOUNDIFY_CLIENT_ID',
'HOUNDIFY_CLIENT_KEY',
'HOUNDIFY_USER_ID',
'HOUNDIFY_MAX_SILENCE_SECONDS',
'HOUNDIFY_MAX_SILENCE_AFTER_FULL_QUERY_SECONDS',
'HOUNDIFY_MAX_SILENCE_AFTER_PARTIAL_QUERY_SECONDS',
'HOUNDIFY_VAD_SENSITIVITY',
'HOUNDIFY_VAD_TIMEOUT',
'HOUNDIFY_VAD_MODE',
'HOUNDIFY_VAD_VOICE_MS',
'HOUNDIFY_VAD_SILENCE_MS',
'HOUNDIFY_VAD_DEBUG',
'HOUNDIFY_AUDIO_FORMAT',
'HOUNDIFY_ENABLE_NOISE_REDUCTION',
'HOUNDIFY_AUDIO_ENDPOINT',
'HOUNDIFY_ENABLE_PROFANITY_FILTER',
'HOUNDIFY_ENABLE_PUNCTUATION',
'HOUNDIFY_ENABLE_CAPITALIZATION',
'HOUNDIFY_CONFIDENCE_THRESHOLD',
'HOUNDIFY_ENABLE_DISFLUENCY_FILTER',
'HOUNDIFY_MAX_RESULTS',
'HOUNDIFY_ENABLE_WORD_TIMESTAMPS',
'HOUNDIFY_MAX_ALTERNATIVES',
'HOUNDIFY_PARTIAL_TRANSCRIPT_INTERVAL',
'HOUNDIFY_SESSION_TIMEOUT',
'HOUNDIFY_CONNECTION_TIMEOUT',
'HOUNDIFY_LATITUDE',
'HOUNDIFY_LONGITUDE',
'HOUNDIFY_CITY',
'HOUNDIFY_STATE',
'HOUNDIFY_COUNTRY',
'HOUNDIFY_TIMEZONE',
'HOUNDIFY_DOMAIN',
'HOUNDIFY_CUSTOM_VOCABULARY',
'HOUNDIFY_LANGUAGE_MODEL'
],
};
/**
@@ -339,20 +376,56 @@ const normalizeDeepgram = (evt, channel, language, shortUtterance) => {
};
};
const normalizeDeepgramRiver = (evt, channel, language, shortUtterance) => {
const normalizeGladia = (evt, channel, language, shortUtterance) => {
const copy = JSON.parse(JSON.stringify(evt));
// Handle Gladia transcript format
if (evt.type === 'transcript' && evt.data && evt.data.utterance) {
const utterance = evt.data.utterance;
const alternatives = [{
confidence: utterance.confidence || 0,
transcript: utterance.text || '',
}];
return {
language_code: utterance.language || language,
channel_tag: channel,
is_final: evt.data.is_final || false,
alternatives,
vendor: {
name: 'gladia',
evt: copy
}
};
}
};
const normalizeDeepgramFlux = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
let turnTakingEvent;
if (['StartOfTurn', 'EagerEndOfTurn', 'TurnResumed', 'EndOfTurn'].includes(evt.event)) {
turnTakingEvent = evt.event;
}
/* calculate total confidence based on word-level confidence */
const realWords = (evt.words || [])
.filter((w) => ![',.!?;'].includes(w.word));
const confidence = realWords.length > 0 ? realWords.reduce((acc, w) => acc + w.confidence, 0) / realWords.length : 0;
return {
language_code: language,
channel_tag: channel,
is_final: evt.event === 'EndOfTurn',
alternatives: [
{
confidence: evt.end_of_turn_confidence,
confidence,
end_of_turn_confidence: evt.end_of_turn_confidence,
transcript: evt.transcript,
...(turnTakingEvent && {turn_taking_event: turnTakingEvent})
}
],
vendor: {
name: 'deepgramriver',
name: 'deepgramflux',
evt: copy
}
};
@@ -570,6 +643,30 @@ const normalizeAssemblyAi = (evt, channel, language) => {
};
};
const normalizeHoundify = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const alternatives = [];
const is_final = evt.ResultsAreFinal && evt.ResultsAreFinal[0] === true;
if (evt.Disambiguation && evt.Disambiguation.ChoiceData && evt.Disambiguation.ChoiceData.length > 0) {
// Handle Houndify Voice Search Result format
const choiceData = evt.Disambiguation.ChoiceData[0];
alternatives.push({
confidence: choiceData.ConfidenceScore || choiceData.ASRConfidence || 0.0,
transcript: choiceData.FormattedTranscription || choiceData.Transcription || '',
});
}
return {
language_code: language,
channel_tag: channel,
is_final,
alternatives,
vendor: {
name: 'houndify',
evt: copy
}
};
};
const normalizeVoxist = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
@@ -669,8 +766,10 @@ module.exports = (logger) => {
switch (vendor) {
case 'deepgram':
return normalizeDeepgram(evt, channel, language, shortUtterance);
case 'deepgramriver':
return normalizeDeepgramRiver(evt, channel, language, shortUtterance);
case 'gladia':
return normalizeGladia(evt, channel, language, shortUtterance);
case 'deepgramflux':
return normalizeDeepgramFlux(evt, channel, language, shortUtterance);
case 'microsoft':
return normalizeMicrosoft(evt, channel, language, punctuation);
case 'google':
@@ -689,6 +788,8 @@ module.exports = (logger) => {
return normalizeCobalt(evt, channel, language);
case 'assemblyai':
return normalizeAssemblyAi(evt, channel, language, shortUtterance);
case 'houndify':
return normalizeHoundify(evt, channel, language, shortUtterance);
case 'voxist':
return normalizeVoxist(evt, channel, language);
case 'cartesia':
@@ -965,19 +1066,30 @@ module.exports = (logger) => {
...(entityPrompt && {DEEPGRAM_SPEECH_ENTITY_PROMPT: entityPrompt}),
};
}
else if ('deepgramriver' === vendor) {
else if ('deepgramflux' === vendor) {
const {
preflightThreshold,
eotThreshold,
eotTimeoutMs,
mipOptOut
mipOptOut,
model,
eagerEotThreshold,
keyterms
} = rOpts.deepgramOptions || {};
opts = {
DEEPGRAM_API_KEY: sttCredentials.api_key,
...(preflightThreshold && {DEEPGRAM_SPEECH_PRELIGHT_THRESHOLD: preflightThreshold}),
...(eotThreshold && {DEEPGRAM_SPEECH_EOT_THRESHOLD: eotThreshold}),
...(eotTimeoutMs && {DEEPGRAM_SPEECH_EOT_TIMEOUT_MS: eotTimeoutMs}),
...(mipOptOut && {DEEPGRAM_SPEECH_MIP_OPT_OUT: mipOptOut}),
DEEPGRAMFLUX_API_KEY: sttCredentials.api_key,
DEEPGRAMFLUX_SPEECH_MODEL: model || 'flux-general-en',
...(eotThreshold && {DEEPGRAMFLUX_SPEECH_EOT_THRESHOLD: eotThreshold}),
...(eotTimeoutMs && {DEEPGRAMFLUX_SPEECH_EOT_TIMEOUT_MS: eotTimeoutMs}),
...(mipOptOut && {DEEPGRAMFLUX_SPEECH_MIP_OPT_OUT: mipOptOut}),
...(eagerEotThreshold && {DEEPGRAMFLUX_SPEECH_EAGER_EOT_THRESHOLD: eagerEotThreshold}),
...(keyterms && keyterms.length > 0 && {DEEPGRAMFLUX_SPEECH_KEYTERMS: keyterms.join(',')}),
};
}
else if ('gladia' === vendor) {
const {host, path} = sttCredentials;
opts = {
GLADIA_SPEECH_HOST: host,
GLADIA_SPEECH_PATH: path,
};
}
else if ('soniox' === vendor) {
@@ -1106,6 +1218,57 @@ module.exports = (logger) => {
{ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)})
};
}
else if ('houndify' === vendor) {
const {
latitude, longitude, city, state, country, timeZone, domain, audioEndpoint,
maxSilenceSeconds, maxSilenceAfterFullQuerySeconds, maxSilenceAfterPartialQuerySeconds,
vadSensitivity, vadTimeout, vadMode, vadVoiceMs, vadSilenceMs, vadDebug,
audioFormat, enableNoiseReduction, enableProfanityFilter, enablePunctuation,
enableCapitalization, confidenceThreshold, enableDisfluencyFilter,
maxResults, enableWordTimestamps, maxAlternatives, partialTranscriptInterval,
sessionTimeout, connectionTimeout, customVocabulary, languageModel
} = rOpts.houndifyOptions || {};
opts = {
...opts,
HOUNDIFY_CLIENT_ID: sttCredentials.client_id,
HOUNDIFY_CLIENT_KEY: sttCredentials.client_key,
HOUNDIFY_USER_ID: sttCredentials.user_id,
HOUNDIFY_MAX_SILENCE_SECONDS: maxSilenceSeconds || 5,
HOUNDIFY_MAX_SILENCE_AFTER_FULL_QUERY_SECONDS: maxSilenceAfterFullQuerySeconds || 1,
HOUNDIFY_MAX_SILENCE_AFTER_PARTIAL_QUERY_SECONDS: maxSilenceAfterPartialQuerySeconds || 1.5,
...(vadSensitivity && {HOUNDIFY_VAD_SENSITIVITY: vadSensitivity}),
...(vadTimeout && {HOUNDIFY_VAD_TIMEOUT: vadTimeout}),
...(vadMode && {HOUNDIFY_VAD_MODE: vadMode}),
...(vadVoiceMs && {HOUNDIFY_VAD_VOICE_MS: vadVoiceMs}),
...(vadSilenceMs && {HOUNDIFY_VAD_SILENCE_MS: vadSilenceMs}),
...(vadDebug && {HOUNDIFY_VAD_DEBUG: vadDebug}),
...(audioFormat && {HOUNDIFY_AUDIO_FORMAT: audioFormat}),
...(enableNoiseReduction && {HOUNDIFY_ENABLE_NOISE_REDUCTION: enableNoiseReduction}),
...(enableProfanityFilter && {HOUNDIFY_ENABLE_PROFANITY_FILTER: enableProfanityFilter}),
...(enablePunctuation && {HOUNDIFY_ENABLE_PUNCTUATION: enablePunctuation}),
...(enableCapitalization && {HOUNDIFY_ENABLE_CAPITALIZATION: enableCapitalization}),
...(confidenceThreshold && {HOUNDIFY_CONFIDENCE_THRESHOLD: confidenceThreshold}),
...(enableDisfluencyFilter && {HOUNDIFY_ENABLE_DISFLUENCY_FILTER: enableDisfluencyFilter}),
...(maxResults && {HOUNDIFY_MAX_RESULTS: maxResults}),
...(enableWordTimestamps && {HOUNDIFY_ENABLE_WORD_TIMESTAMPS: enableWordTimestamps}),
...(maxAlternatives && {HOUNDIFY_MAX_ALTERNATIVES: maxAlternatives}),
...(partialTranscriptInterval && {HOUNDIFY_PARTIAL_TRANSCRIPT_INTERVAL: partialTranscriptInterval}),
...(sessionTimeout && {HOUNDIFY_SESSION_TIMEOUT: sessionTimeout}),
...(connectionTimeout && {HOUNDIFY_CONNECTION_TIMEOUT: connectionTimeout}),
...(latitude && {HOUNDIFY_LATITUDE: latitude}),
...(longitude && {HOUNDIFY_LONGITUDE: longitude}),
...(city && {HOUNDIFY_CITY: city}),
...(state && {HOUNDIFY_STATE: state}),
...(country && {HOUNDIFY_COUNTRY: country}),
...(timeZone && {HOUNDIFY_TIMEZONE: timeZone}),
...(domain && {HOUNDIFY_DOMAIN: domain}),
...(audioEndpoint && {HOUNDIFY_AUDIO_ENDPOINT: audioEndpoint}),
...(customVocabulary && {HOUNDIFY_CUSTOM_VOCABULARY:
Array.isArray(customVocabulary) ? customVocabulary.join(',') : customVocabulary}),
...(languageModel && {HOUNDIFY_LANGUAGE_MODEL: languageModel}),
};
}
else if ('voxist' === vendor) {
opts = {
...opts,

32
package-lock.json generated
View File

@@ -11,14 +11,14 @@
"dependencies": {
"@aws-sdk/client-auto-scaling": "^3.549.0",
"@aws-sdk/client-sns": "^3.549.0",
"@jambonz/db-helpers": "^0.9.17",
"@jambonz/db-helpers": "^0.9.18",
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.15",
"@jambonz/speech-utils": "^0.2.23",
"@jambonz/speech-utils": "^0.2.25",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.14",
"@jambonz/verb-specifications": "^0.0.113",
"@jambonz/verb-specifications": "^0.0.118",
"@modelcontextprotocol/sdk": "^1.9.0",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",
@@ -33,7 +33,7 @@
"debug": "^4.3.4",
"deepcopy": "^2.1.0",
"drachtio-fsmrf": "^4.1.2",
"drachtio-srf": "^5.0.5",
"drachtio-srf": "^5.0.11",
"express": "^4.19.2",
"express-validator": "^7.0.1",
"moment": "^2.30.1",
@@ -1437,9 +1437,9 @@
}
},
"node_modules/@jambonz/db-helpers": {
"version": "0.9.17",
"resolved": "https://registry.npmjs.org/@jambonz/db-helpers/-/db-helpers-0.9.17.tgz",
"integrity": "sha512-LQErcrT/uts83Up9UJVqPPSZy0+CLw/djES3bgUoB600uLz7Wfgpi309XTIjNLikQjDsNFaTneZY0QwDKMCbzg==",
"version": "0.9.18",
"resolved": "https://registry.npmjs.org/@jambonz/db-helpers/-/db-helpers-0.9.18.tgz",
"integrity": "sha512-rrrBzz6UnEW0LV4OXRUzjg2XzwUGNjuJptKPxozvCQYOSuXQsRYcc7AM+p32ssBd+LdTX866pQ0QwRc2cUEAPw==",
"license": "MIT",
"dependencies": {
"cidr-matcher": "^2.1.1",
@@ -1476,9 +1476,9 @@
}
},
"node_modules/@jambonz/speech-utils": {
"version": "0.2.23",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.2.23.tgz",
"integrity": "sha512-o28IBoKzdnQoUUSC1XljHVkDPWhkTH+rFnI9OWYC6p1/f8px++4Y23/JMIAJVbxqKB1CIf531JhTwy4tCnQP7g==",
"version": "0.2.25",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.2.25.tgz",
"integrity": "sha512-dtMY4SoGhpvKM6slPvTlMz61Fd0Rxnajwm3sJVxlAYZnFtzfIvqg9rrYUN4h05tJbo/4TLs15/dVeicw7dTnew==",
"license": "MIT",
"dependencies": {
"23": "^0.0.0",
@@ -1517,9 +1517,9 @@
}
},
"node_modules/@jambonz/verb-specifications": {
"version": "0.0.113",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.113.tgz",
"integrity": "sha512-8dqEtUwEYw8wA7a0xYruBdwmUwUx1Q97483UgkVm3HcmJVkkkf9g7C3vI+jfZLyDZsOuu6CkFAf/KrM8DTx/KA==",
"version": "0.0.118",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.118.tgz",
"integrity": "sha512-1dGnc6TUCehjt1yGNuqh1uzk1xw9HhUm39aVUosQMHlnT0fK0ItikeJ0uttTjFastHNmPPxqJwb20wOvVGTCFg==",
"license": "MIT",
"dependencies": {
"debug": "^4.3.4",
@@ -3890,9 +3890,9 @@
}
},
"node_modules/drachtio-srf": {
"version": "5.0.10",
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-5.0.10.tgz",
"integrity": "sha512-AjA2nAwGpepmJB+Tx/VmnwhOLZ54ATXd+gAeSLBhUm2Id4T0nwVXRNsnqzv7gryQO4BH3IwCdUwkSa7qnwOSOA==",
"version": "5.0.13",
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-5.0.13.tgz",
"integrity": "sha512-IG62MLLzhXjQtbjX6T6I8jXK6QhWQwsblkO3+F2Zhcu4lXBO3W12rrKyAPsw6GimRSSXIkvMbn5/je8AU/xehQ==",
"license": "MIT",
"dependencies": {
"debug": "^3.2.7",

View File

@@ -27,14 +27,14 @@
"dependencies": {
"@aws-sdk/client-auto-scaling": "^3.549.0",
"@aws-sdk/client-sns": "^3.549.0",
"@jambonz/db-helpers": "^0.9.17",
"@jambonz/db-helpers": "^0.9.18",
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.15",
"@jambonz/speech-utils": "^0.2.23",
"@jambonz/speech-utils": "^0.2.25",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.14",
"@jambonz/verb-specifications": "^0.0.113",
"@jambonz/verb-specifications": "^0.0.118",
"@modelcontextprotocol/sdk": "^1.9.0",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",
@@ -49,7 +49,7 @@
"debug": "^4.3.4",
"deepcopy": "^2.1.0",
"drachtio-fsmrf": "^4.1.2",
"drachtio-srf": "^5.0.5",
"drachtio-srf": "^5.0.11",
"express": "^4.19.2",
"express-validator": "^7.0.1",
"moment": "^2.30.1",