Compare commits

..

15 Commits

Author SHA1 Message Date
Sam Machin
a0a579ccee escape json special chars in metadata (#1399) 2025-10-20 10:30:03 -04:00
Sam Machin
4218653852 add customerData on transferred calls (#1391)
* add customerData on transferred calls

* change to if statement
2025-10-20 09:20:12 -04:00
Hoan Luu Huu
89cc39f726 support gladia stt (#1397)
* support gladia stt

* wip

* wip

* update verb specification
2025-10-20 04:56:39 -04:00
Sam Machin
b231593bff bump dbhelpers for cache change (#1396) 2025-10-15 11:38:43 -04:00
Sam Machin
4309d25376 don't encode querystring if its the filename (#1395)
* don't encode querystring if its the filename

* lint

* update link to issue

u
2025-10-14 10:48:50 -04:00
Hoan Luu Huu
a00703a067 support houndify stt (#1364)
* support houndify stt

* wip

* wip

* wip

* update houndify stt parameters

* wip

* wip
2025-10-14 00:55:21 -04:00
Hoan Luu Huu
89c985b564 fixed does not send final status call back if call canceled quickly (#1393)
* fixed callsession should cleanup resource if call was canceled while fetching app

* wip

* wip

* wip

* wip

* wip
2025-10-11 03:44:42 -04:00
Dave Horton
b4ed4c8c46 #1385: Gather - dont start the continuous asr timer when we first start listening if this is a background gather (#1386) 2025-10-09 08:47:51 -04:00
Hoan Luu Huu
581d309f36 support elevenlabs different endpoint (#1387)
* support elevenlabs different endpoint

* wip

* wip
2025-10-09 08:19:40 -04:00
Sam Machin
d1baf2fe37 if call is transferred from another FS then always answer (#1383)
Currently if the call being transferred was originally an outbound call then the direction thats retrieved from redis is outbound and the invite of the refer from the other FS is never answered,
However a transferredCall will always need to be answered regardless of CallDirection
2025-10-07 07:19:11 -04:00
Dave Horton
28bf0d3477 send eager_eot events (#1382) 2025-10-06 16:50:20 -04:00
Dave Horton
d2d3b4583e Fix/flux cleanup (#1379)
* for deepgram flux include the turn taking events in the transcription payload

* for deepgram flux, including turn_taking_event in the speech payload

* fix prev commit which used wrong field
2025-10-04 20:06:38 -04:00
Hoan Luu Huu
854c26db11 support deepgramflux (#1373)
* support deepgramflux

* wip

* wip

* wip

* wip

* update verb scpecification
2025-10-03 10:38:39 -04:00
Dave Horton
e77666a1a7 update speech-utils and drachtio-srf (#1377) 2025-10-03 10:06:37 -04:00
Ed Robbins
5acb19225b If an error occurs during initial TTS request, propagate the error (#1369)
* If an error occurs during initial TTS request, propagate the error

* fix missing semicolon

* fix jslint error

* add null check to r.playbackMilliseconds
2025-10-01 00:02:51 -04:00
15 changed files with 425 additions and 61 deletions

View File

@@ -112,6 +112,14 @@ module.exports = function(srf, logger) {
req.locals.callingNumber = sipURIs[1];
}
}
// Feature server INVITE request pipelines taking time to finish,
// while connecting and fetch application from db and invoking webhook.
// call can be canceled without any handling, so we add a listener here
req.once('cancel', (sipMsg) => {
logger.info(`${callId} got CANCEL request`);
req.locals.canceled = true;
});
next();
}
@@ -362,13 +370,14 @@ module.exports = function(srf, logger) {
});
// if transferred call contains callInfo, let update original data to newly created callInfo in this instance.
if (app.transferredCall && app.callInfo) {
const {direction, callerName, from, to, originatingSipIp, originatingSipTrunkName} = app.callInfo;
const {direction, callerName, from, to, originatingSipIp, originatingSipTrunkName, customerData} = app.callInfo;
req.locals.callInfo.direction = direction;
req.locals.callInfo.callerName = callerName;
req.locals.callInfo.from = from;
req.locals.callInfo.to = to;
req.locals.callInfo.originatingSipIp = originatingSipIp;
req.locals.callInfo.originatingSipTrunkName = originatingSipTrunkName;
if (customerData) req.locals.callInfo.customerData = customerData;
delete app.callInfo;
}
next();

View File

@@ -1086,6 +1086,13 @@ class CallSession extends Emitter {
deepgram_stt_use_tls: credential.deepgram_stt_use_tls
};
}
else if ('gladia' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
api_key: credential.api_key,
region: credential.region,
};
}
else if ('soniox' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
@@ -1117,6 +1124,7 @@ class CallSession extends Emitter {
return {
api_key: credential.api_key,
model_id: credential.model_id,
api_uri: credential.api_uri,
options: credential.options
};
}
@@ -1165,7 +1173,15 @@ class CallSession extends Emitter {
service_version: credential.service_version
};
}
else if ('deepgramriver' === vendor) {
else if ('houndify' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
client_id: credential.client_id,
client_key: credential.client_key,
user_id: credential.user_id
};
}
else if ('deepgramflux' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
api_key: credential.api_key,
@@ -2399,7 +2415,7 @@ Duration=${duration} `
this.logger.debug(`endpoint was destroyed!! ${this.ep.uuid}`);
});
if (this.direction === CallDirection.Inbound) {
if (this.direction === CallDirection.Inbound || this.application?.transferredCall) {
if (task.earlyMedia && !this.req.finalResponseSent) {
this.res.send(183, {body: ep.local.sdp});
return {ep};

View File

@@ -22,6 +22,12 @@ class InboundCallSession extends CallSession {
this.req = req;
this.res = res;
// if the call was canceled before we got here, handle it
if (this.req.locals.canceled) {
req.locals.logger.info('InboundCallSession: constructor - call was already canceled');
this._onCancel();
}
req.once('cancel', this._onCancel.bind(this));
this.on('callStatusChange', this._notifyCallStatusChange.bind(this));

View File

@@ -5,13 +5,15 @@ const {
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
GladiaTranscriptionEvents,
SonioxTranscriptionEvents,
CobaltTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents,
AssemblyAiTranscriptionEvents,
DeepgramRiverTranscriptionEvents,
HoundifyTranscriptionEvents,
DeepgramfluxTranscriptionEvents,
VoxistTranscriptionEvents,
CartesiaTranscriptionEvents,
OpenAITranscriptionEvents,
@@ -93,6 +95,8 @@ class TaskGather extends SttTask {
get needsStt() { return this.input.includes('speech'); }
get isBackgroundGather() { return this.bugname_prefix === 'background_bargeIn_'; }
get wantsSingleUtterance() {
return this.data.recognizer?.singleUtterance === true;
}
@@ -227,7 +231,9 @@ class TaskGather extends SttTask {
const startListening = async(cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
if (this.isContinuousAsr && 0 === this.timeout && !this.isBackgroundGather) {
this._startAsrTimer();
}
if (this.input.includes('speech') && !this.listenDuringPrompt) {
try {
await this._setSpeechHandlers(cs, ep);
@@ -471,15 +477,25 @@ class TaskGather extends SttTask {
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'deepgramriver':
this.bugname = `${this.bugname_prefix}deepgramriver_transcribe`;
case 'deepgramflux':
this.bugname = `${this.bugname_prefix}deepgramflux_transcribe`;
this.addCustomEventListener(
ep, DeepgramRiverTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep, DeepgramfluxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(
ep, DeepgramRiverTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.ConnectFailure,
ep, DeepgramfluxTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'gladia':
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
this.addCustomEventListener(
ep, GladiaTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'soniox':
@@ -559,6 +575,18 @@ class TaskGather extends SttTask {
this._onVendorConnectFailure.bind(this, cs, ep));
break;
case 'houndify':
this.bugname = `${this.bugname_prefix}houndify_transcribe`;
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Error,
this._onVendorError.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
break;
case 'voxist':
this.bugname = `${this.bugname_prefix}voxist_transcribe`;
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription,
@@ -914,7 +942,7 @@ class TaskGather extends SttTask {
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language,
this.shortUtterance, this.data.recognizer.punctuation);
//this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
if (evt.alternatives.length === 0) {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
@@ -1080,6 +1108,11 @@ class TaskGather extends SttTask {
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt},
this.cs.callInfo, httpHeaders));
}
else if (this.vendor === 'deepgramflux' &&
['EagerEndOfTurn', 'TurnResumed'].includes(evt.vendor.evt?.event)) {
this.logger.debug(`Gather:_onTranscription - deepgramflux event detected: ${evt.event}`);
this.performAction({speech: evt, reason: 'speechDetected'}, false);
}
if (this.vendor === 'soniox') {
if (evt.vendor.finalWords.length) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');

View File

@@ -5,6 +5,17 @@ const moment = require('moment');
const MAX_PLAY_AUDIO_QUEUE_SIZE = 10;
const DTMF_SPAN_NAME = 'dtmf';
function escapeString(str) {
return str
.replace(/\\/g, '\\\\') // Escape backslashes
.replace(/"/g, '\\"') // Escape double quotes
.replace(/\b/g, '\\b') // Escape backspace
.replace(/\f/g, '\\f') // Escape formfeed
.replace(/\n/g, '\\n') // Escape newlines
.replace(/\r/g, '\\r') // Escape carriage returns
.replace(/\t/g, '\\t'); // Escape tabs
}
class TaskListen extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts);
@@ -16,10 +27,20 @@ class TaskListen extends Task {
this.preconditions = TaskPreconditions.Endpoint;
[
'action', 'auth', 'method', 'url', 'finishOnKey', 'maxLength', 'metadata', 'mixType', 'passDtmf', 'playBeep',
'action', 'auth', 'method', 'url', 'finishOnKey', 'maxLength', 'mixType', 'passDtmf', 'playBeep',
'sampleRate', 'timeout', 'transcribe', 'wsAuth', 'disableBidirectionalAudio', 'channel'
].forEach((k) => this[k] = this.data[k]);
//Escape JSON special characters in metadata
if (this.data.metadata) {
this.metadata = {};
for (const key in this.data.metadata) {
if (this.data.metadata.hasOwnProperty(key)) {
this.metadata[key] = escapeString(this.data.metadata[key]);
}
}
}
this.mixType = this.mixType || 'mono';
this.sampleRate = this.sampleRate || 8000;
this.earlyMedia = this.data.earlyMedia === true;

View File

@@ -6,9 +6,21 @@ class TaskPlay extends Task {
super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint;
this.url = this.data.url.includes('?')
? this.data.url.split('?')[0] + '?' + this.data.url.split('?')[1].replaceAll('.', '%2E')
: this.data.url;
//Cleanup URLs that contain a querystring with a . unless that querystring is the filename
// see https://github.com/jambonz/jambonz-feature-server/pull/1293
// and https://github.com/jambonz/jambonz-feature-server/issues/1394 for background
if (this.data.url.includes('?')) {
if (['.mp3', '.wav'].includes(this.data.url.slice(-4))) {
this.url = this.data.url;
}
else {
this.url = this.data.url.split('?')[0] + '?' + this.data.url.split('?')[1].replaceAll('.', '%2E');
}
}
else {
this.url = this.data.url;
}
this.seekOffset = this.data.seekOffset || -1;
this.timeoutSecs = this.data.timeoutSecs || -1;
this.loop = this.data.loop || 1;

View File

@@ -409,6 +409,9 @@ class TaskSay extends TtsTask {
});
const r = await ep.play(filename);
this.logger.debug({r}, 'Say:exec play result');
if (r.playbackSeconds == null && r.playbackMilliseconds == null && r.playbackLastOffsetPos == null) {
this._playReject(new Error('Playback failed to start'));
}
try {
// wait for playback-stop event received to confirm if the playback is successful
await this._playPromise;

View File

@@ -203,6 +203,56 @@ class SttTask extends Task {
if (cs.hasGlobalSttPunctuation && !this.data.recognizer.punctuation) {
this.data.recognizer.punctuation = cs.globalSttPunctuation;
}
if (this.vendor === 'gladia') {
const { api_key, region } = this.sttCredentials;
const {url} = await this.createGladiaLiveSession({
api_key, region,
model: this.data.recognizer.model || 'solaria-1',
options: this.data.recognizer.gladiaOptions || {}
});
const {host, pathname, search} = new URL(url);
this.sttCredentials.host = host;
this.sttCredentials.path = `${pathname}${search}`;
}
}
async createGladiaLiveSession({
api_key,
region = 'us-west',
model = 'solaria-1',
options = {},
}) {
const url = `https://api.gladia.io/v2/live?region=${region}`;
const response = await fetch(url, {
method: 'POST',
headers: {
'x-gladia-key': api_key,
'Content-Type': 'application/json'
},
body: JSON.stringify({
encoding: 'wav/pcm',
bit_depth: 16,
sample_rate: 8000,
channels: 1,
model,
...options,
messages_config: {
receive_final_transcripts: true,
receive_speech_events: true,
receive_errors: true,
}
})
});
if (!response.ok) {
const error = await response.text();
this.logger.error({url, status: response.status, error}, 'Error creating Gladia live session');
throw new Error(`Error creating Gladia live session: ${response.status} ${error}`);
}
const data = await response.json();
this.logger.debug({url: data.url}, 'Gladia Call registered');
return data;
}
addCustomEventListener(ep, event, handler) {

View File

@@ -6,7 +6,8 @@ const {
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
DeepgramRiverTranscriptionEvents,
GladiaTranscriptionEvents,
DeepgramfluxTranscriptionEvents,
SonioxTranscriptionEvents,
CobaltTranscriptionEvents,
IbmTranscriptionEvents,
@@ -14,6 +15,7 @@ const {
JambonzTranscriptionEvents,
TranscribeStatus,
AssemblyAiTranscriptionEvents,
HoundifyTranscriptionEvents,
VoxistTranscriptionEvents,
CartesiaTranscriptionEvents,
OpenAITranscriptionEvents,
@@ -244,15 +246,27 @@ class TaskTranscribe extends SttTask {
//if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
break;
case 'deepgramriver':
this.bugname = `${this.bugname_prefix}deepgramriver_transcribe`;
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.Transcription,
case 'deepgramflux':
this.bugname = `${this.bugname_prefix}deepgramflux_transcribe`;
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.Connect,
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.ConnectFailure,
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, DeepgramRiverTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'gladia':
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'soniox':
@@ -324,6 +338,18 @@ class TaskTranscribe extends SttTask {
this._onVendorConnectFailure.bind(this, cs, ep, channel));
break;
case 'houndify':
this.bugname = `${this.bugname_prefix}houndify_transcribe`;
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Error,
this._onVendorError.bind(this, cs, ep));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, HoundifyTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
break;
case 'voxist':
this.bugname = `${this.bugname_prefix}voxist_transcribe`;
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription,

View File

@@ -81,7 +81,7 @@ class TtsTask extends Task {
}
async setTtsStreamingChannelVars(vendor, language, voice, credentials, ep) {
const {api_key, model_id, custom_tts_streaming_url, auth_token} = credentials;
const {api_key, model_id, api_uri, custom_tts_streaming_url, auth_token} = credentials;
let obj;
this.logger.debug(`setTtsStreamingChannelVars: vendor: ${vendor}, language: ${language}, voice: ${voice}`);
@@ -105,6 +105,7 @@ class TtsTask extends Task {
const {stability, similarity_boost, use_speaker_boost, style, speed} = this.options.voice_settings || {};
obj = {
ELEVENLABS_API_KEY: api_key,
...(api_uri && {ELEVENLABS_API_URI: api_uri}),
ELEVENLABS_TTS_STREAMING_MODEL_ID: model_id,
ELEVENLABS_TTS_STREAMING_VOICE_ID: voice,
// 20/12/2024 - only eleven_turbo_v2_5 support multiple language

View File

@@ -97,11 +97,17 @@
"Connect": "deepgram_transcribe::connect",
"Error": "deepgram_transcribe::error"
},
"DeepgramRiverTranscriptionEvents": {
"Transcription": "deepgramriver_transcribe::transcription",
"ConnectFailure": "deepgramriver_transcribe::connect_failed",
"Connect": "deepgramriver_transcribe::connect",
"Error": "deepgramriver_transcribe::error"
"DeepgramfluxTranscriptionEvents": {
"Transcription": "deepgramflux_transcribe::transcription",
"ConnectFailure": "deepgramflux_transcribe::connect_failed",
"Connect": "deepgramflux_transcribe::connect",
"Error": "deepgramflux_transcribe::error"
},
"GladiaTranscriptionEvents": {
"Transcription": "gladia_transcribe::transcription",
"ConnectFailure": "gladia_transcribe::connect_failed",
"Connect": "gladia_transcribe::connect",
"Error": "gladia_transcribe::error"
},
"SonioxTranscriptionEvents": {
"Transcription": "soniox_transcribe::transcription",
@@ -169,6 +175,12 @@
"ConnectFailure": "assemblyai_transcribe::connect_failed",
"Connect": "assemblyai_transcribe::connect"
},
"HoundifyTranscriptionEvents": {
"Transcription": "houndify_transcribe::transcription",
"Error": "houndify_transcribe::error",
"ConnectFailure": "houndify_transcribe::connect_failed",
"Connect": "houndify_transcribe::connect"
},
"VoxistTranscriptionEvents": {
"Transcription": "voxist_transcribe::transcription",
"Error": "voxist_transcribe::error",

View File

@@ -81,7 +81,12 @@ const speechMapper = (cred) => {
obj.deepgram_tts_uri = o.deepgram_tts_uri;
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
}
else if ('deepgramriver' === obj.vendor) {
else if ('gladia' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.region = o.region;
}
else if ('deepgramflux' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
}
@@ -101,6 +106,7 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
obj.api_uri = o.api_uri;
obj.options = o.options;
}
else if ('playht' === obj.vendor) {
@@ -141,6 +147,12 @@ const speechMapper = (cred) => {
obj.api_key = o.api_key;
obj.service_version = o.service_version;
}
else if ('houndify' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
obj.client_key = o.client_key;
obj.user_id = o.user_id;
}
else if ('voxist' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;

View File

@@ -131,6 +131,43 @@ const stickyVars = {
'OPENAI_TURN_DETECTION_PREFIX_PADDING_MS',
'OPENAI_TURN_DETECTION_SILENCE_DURATION_MS',
],
houndify: [
'HOUNDIFY_CLIENT_ID',
'HOUNDIFY_CLIENT_KEY',
'HOUNDIFY_USER_ID',
'HOUNDIFY_MAX_SILENCE_SECONDS',
'HOUNDIFY_MAX_SILENCE_AFTER_FULL_QUERY_SECONDS',
'HOUNDIFY_MAX_SILENCE_AFTER_PARTIAL_QUERY_SECONDS',
'HOUNDIFY_VAD_SENSITIVITY',
'HOUNDIFY_VAD_TIMEOUT',
'HOUNDIFY_VAD_MODE',
'HOUNDIFY_VAD_VOICE_MS',
'HOUNDIFY_VAD_SILENCE_MS',
'HOUNDIFY_VAD_DEBUG',
'HOUNDIFY_AUDIO_FORMAT',
'HOUNDIFY_ENABLE_NOISE_REDUCTION',
'HOUNDIFY_AUDIO_ENDPOINT',
'HOUNDIFY_ENABLE_PROFANITY_FILTER',
'HOUNDIFY_ENABLE_PUNCTUATION',
'HOUNDIFY_ENABLE_CAPITALIZATION',
'HOUNDIFY_CONFIDENCE_THRESHOLD',
'HOUNDIFY_ENABLE_DISFLUENCY_FILTER',
'HOUNDIFY_MAX_RESULTS',
'HOUNDIFY_ENABLE_WORD_TIMESTAMPS',
'HOUNDIFY_MAX_ALTERNATIVES',
'HOUNDIFY_PARTIAL_TRANSCRIPT_INTERVAL',
'HOUNDIFY_SESSION_TIMEOUT',
'HOUNDIFY_CONNECTION_TIMEOUT',
'HOUNDIFY_LATITUDE',
'HOUNDIFY_LONGITUDE',
'HOUNDIFY_CITY',
'HOUNDIFY_STATE',
'HOUNDIFY_COUNTRY',
'HOUNDIFY_TIMEZONE',
'HOUNDIFY_DOMAIN',
'HOUNDIFY_CUSTOM_VOCABULARY',
'HOUNDIFY_LANGUAGE_MODEL'
],
};
/**
@@ -339,20 +376,56 @@ const normalizeDeepgram = (evt, channel, language, shortUtterance) => {
};
};
const normalizeDeepgramRiver = (evt, channel, language, shortUtterance) => {
const normalizeGladia = (evt, channel, language, shortUtterance) => {
const copy = JSON.parse(JSON.stringify(evt));
// Handle Gladia transcript format
if (evt.type === 'transcript' && evt.data && evt.data.utterance) {
const utterance = evt.data.utterance;
const alternatives = [{
confidence: utterance.confidence || 0,
transcript: utterance.text || '',
}];
return {
language_code: utterance.language || language,
channel_tag: channel,
is_final: evt.data.is_final || false,
alternatives,
vendor: {
name: 'gladia',
evt: copy
}
};
}
};
const normalizeDeepgramFlux = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
let turnTakingEvent;
if (['StartOfTurn', 'EagerEndOfTurn', 'TurnResumed', 'EndOfTurn'].includes(evt.event)) {
turnTakingEvent = evt.event;
}
/* calculate total confidence based on word-level confidence */
const realWords = (evt.words || [])
.filter((w) => ![',.!?;'].includes(w.word));
const confidence = realWords.length > 0 ? realWords.reduce((acc, w) => acc + w.confidence, 0) / realWords.length : 0;
return {
language_code: language,
channel_tag: channel,
is_final: evt.event === 'EndOfTurn',
alternatives: [
{
confidence: evt.end_of_turn_confidence,
confidence,
end_of_turn_confidence: evt.end_of_turn_confidence,
transcript: evt.transcript,
...(turnTakingEvent && {turn_taking_event: turnTakingEvent})
}
],
vendor: {
name: 'deepgramriver',
name: 'deepgramflux',
evt: copy
}
};
@@ -570,6 +643,30 @@ const normalizeAssemblyAi = (evt, channel, language) => {
};
};
const normalizeHoundify = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const alternatives = [];
const is_final = evt.ResultsAreFinal && evt.ResultsAreFinal[0] === true;
if (evt.Disambiguation && evt.Disambiguation.ChoiceData && evt.Disambiguation.ChoiceData.length > 0) {
// Handle Houndify Voice Search Result format
const choiceData = evt.Disambiguation.ChoiceData[0];
alternatives.push({
confidence: choiceData.ConfidenceScore || choiceData.ASRConfidence || 0.0,
transcript: choiceData.FormattedTranscription || choiceData.Transcription || '',
});
}
return {
language_code: language,
channel_tag: channel,
is_final,
alternatives,
vendor: {
name: 'houndify',
evt: copy
}
};
};
const normalizeVoxist = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
@@ -669,8 +766,10 @@ module.exports = (logger) => {
switch (vendor) {
case 'deepgram':
return normalizeDeepgram(evt, channel, language, shortUtterance);
case 'deepgramriver':
return normalizeDeepgramRiver(evt, channel, language, shortUtterance);
case 'gladia':
return normalizeGladia(evt, channel, language, shortUtterance);
case 'deepgramflux':
return normalizeDeepgramFlux(evt, channel, language, shortUtterance);
case 'microsoft':
return normalizeMicrosoft(evt, channel, language, punctuation);
case 'google':
@@ -689,6 +788,8 @@ module.exports = (logger) => {
return normalizeCobalt(evt, channel, language);
case 'assemblyai':
return normalizeAssemblyAi(evt, channel, language, shortUtterance);
case 'houndify':
return normalizeHoundify(evt, channel, language, shortUtterance);
case 'voxist':
return normalizeVoxist(evt, channel, language);
case 'cartesia':
@@ -965,19 +1066,30 @@ module.exports = (logger) => {
...(entityPrompt && {DEEPGRAM_SPEECH_ENTITY_PROMPT: entityPrompt}),
};
}
else if ('deepgramriver' === vendor) {
else if ('deepgramflux' === vendor) {
const {
preflightThreshold,
eotThreshold,
eotTimeoutMs,
mipOptOut
mipOptOut,
model,
eagerEotThreshold,
keyterms
} = rOpts.deepgramOptions || {};
opts = {
DEEPGRAM_API_KEY: sttCredentials.api_key,
...(preflightThreshold && {DEEPGRAM_SPEECH_PRELIGHT_THRESHOLD: preflightThreshold}),
...(eotThreshold && {DEEPGRAM_SPEECH_EOT_THRESHOLD: eotThreshold}),
...(eotTimeoutMs && {DEEPGRAM_SPEECH_EOT_TIMEOUT_MS: eotTimeoutMs}),
...(mipOptOut && {DEEPGRAM_SPEECH_MIP_OPT_OUT: mipOptOut}),
DEEPGRAMFLUX_API_KEY: sttCredentials.api_key,
DEEPGRAMFLUX_SPEECH_MODEL: model || 'flux-general-en',
...(eotThreshold && {DEEPGRAMFLUX_SPEECH_EOT_THRESHOLD: eotThreshold}),
...(eotTimeoutMs && {DEEPGRAMFLUX_SPEECH_EOT_TIMEOUT_MS: eotTimeoutMs}),
...(mipOptOut && {DEEPGRAMFLUX_SPEECH_MIP_OPT_OUT: mipOptOut}),
...(eagerEotThreshold && {DEEPGRAMFLUX_SPEECH_EAGER_EOT_THRESHOLD: eagerEotThreshold}),
...(keyterms && keyterms.length > 0 && {DEEPGRAMFLUX_SPEECH_KEYTERMS: keyterms.join(',')}),
};
}
else if ('gladia' === vendor) {
const {host, path} = sttCredentials;
opts = {
GLADIA_SPEECH_HOST: host,
GLADIA_SPEECH_PATH: path,
};
}
else if ('soniox' === vendor) {
@@ -1106,6 +1218,57 @@ module.exports = (logger) => {
{ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)})
};
}
else if ('houndify' === vendor) {
const {
latitude, longitude, city, state, country, timeZone, domain, audioEndpoint,
maxSilenceSeconds, maxSilenceAfterFullQuerySeconds, maxSilenceAfterPartialQuerySeconds,
vadSensitivity, vadTimeout, vadMode, vadVoiceMs, vadSilenceMs, vadDebug,
audioFormat, enableNoiseReduction, enableProfanityFilter, enablePunctuation,
enableCapitalization, confidenceThreshold, enableDisfluencyFilter,
maxResults, enableWordTimestamps, maxAlternatives, partialTranscriptInterval,
sessionTimeout, connectionTimeout, customVocabulary, languageModel
} = rOpts.houndifyOptions || {};
opts = {
...opts,
HOUNDIFY_CLIENT_ID: sttCredentials.client_id,
HOUNDIFY_CLIENT_KEY: sttCredentials.client_key,
HOUNDIFY_USER_ID: sttCredentials.user_id,
HOUNDIFY_MAX_SILENCE_SECONDS: maxSilenceSeconds || 5,
HOUNDIFY_MAX_SILENCE_AFTER_FULL_QUERY_SECONDS: maxSilenceAfterFullQuerySeconds || 1,
HOUNDIFY_MAX_SILENCE_AFTER_PARTIAL_QUERY_SECONDS: maxSilenceAfterPartialQuerySeconds || 1.5,
...(vadSensitivity && {HOUNDIFY_VAD_SENSITIVITY: vadSensitivity}),
...(vadTimeout && {HOUNDIFY_VAD_TIMEOUT: vadTimeout}),
...(vadMode && {HOUNDIFY_VAD_MODE: vadMode}),
...(vadVoiceMs && {HOUNDIFY_VAD_VOICE_MS: vadVoiceMs}),
...(vadSilenceMs && {HOUNDIFY_VAD_SILENCE_MS: vadSilenceMs}),
...(vadDebug && {HOUNDIFY_VAD_DEBUG: vadDebug}),
...(audioFormat && {HOUNDIFY_AUDIO_FORMAT: audioFormat}),
...(enableNoiseReduction && {HOUNDIFY_ENABLE_NOISE_REDUCTION: enableNoiseReduction}),
...(enableProfanityFilter && {HOUNDIFY_ENABLE_PROFANITY_FILTER: enableProfanityFilter}),
...(enablePunctuation && {HOUNDIFY_ENABLE_PUNCTUATION: enablePunctuation}),
...(enableCapitalization && {HOUNDIFY_ENABLE_CAPITALIZATION: enableCapitalization}),
...(confidenceThreshold && {HOUNDIFY_CONFIDENCE_THRESHOLD: confidenceThreshold}),
...(enableDisfluencyFilter && {HOUNDIFY_ENABLE_DISFLUENCY_FILTER: enableDisfluencyFilter}),
...(maxResults && {HOUNDIFY_MAX_RESULTS: maxResults}),
...(enableWordTimestamps && {HOUNDIFY_ENABLE_WORD_TIMESTAMPS: enableWordTimestamps}),
...(maxAlternatives && {HOUNDIFY_MAX_ALTERNATIVES: maxAlternatives}),
...(partialTranscriptInterval && {HOUNDIFY_PARTIAL_TRANSCRIPT_INTERVAL: partialTranscriptInterval}),
...(sessionTimeout && {HOUNDIFY_SESSION_TIMEOUT: sessionTimeout}),
...(connectionTimeout && {HOUNDIFY_CONNECTION_TIMEOUT: connectionTimeout}),
...(latitude && {HOUNDIFY_LATITUDE: latitude}),
...(longitude && {HOUNDIFY_LONGITUDE: longitude}),
...(city && {HOUNDIFY_CITY: city}),
...(state && {HOUNDIFY_STATE: state}),
...(country && {HOUNDIFY_COUNTRY: country}),
...(timeZone && {HOUNDIFY_TIMEZONE: timeZone}),
...(domain && {HOUNDIFY_DOMAIN: domain}),
...(audioEndpoint && {HOUNDIFY_AUDIO_ENDPOINT: audioEndpoint}),
...(customVocabulary && {HOUNDIFY_CUSTOM_VOCABULARY:
Array.isArray(customVocabulary) ? customVocabulary.join(',') : customVocabulary}),
...(languageModel && {HOUNDIFY_LANGUAGE_MODEL: languageModel}),
};
}
else if ('voxist' === vendor) {
opts = {
...opts,

30
package-lock.json generated
View File

@@ -11,14 +11,14 @@
"dependencies": {
"@aws-sdk/client-auto-scaling": "^3.549.0",
"@aws-sdk/client-sns": "^3.549.0",
"@jambonz/db-helpers": "^0.9.17",
"@jambonz/db-helpers": "^0.9.18",
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.15",
"@jambonz/speech-utils": "^0.2.23",
"@jambonz/speech-utils": "^0.2.25",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.14",
"@jambonz/verb-specifications": "^0.0.113",
"@jambonz/verb-specifications": "^0.0.118",
"@modelcontextprotocol/sdk": "^1.9.0",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",
@@ -1437,9 +1437,9 @@
}
},
"node_modules/@jambonz/db-helpers": {
"version": "0.9.17",
"resolved": "https://registry.npmjs.org/@jambonz/db-helpers/-/db-helpers-0.9.17.tgz",
"integrity": "sha512-LQErcrT/uts83Up9UJVqPPSZy0+CLw/djES3bgUoB600uLz7Wfgpi309XTIjNLikQjDsNFaTneZY0QwDKMCbzg==",
"version": "0.9.18",
"resolved": "https://registry.npmjs.org/@jambonz/db-helpers/-/db-helpers-0.9.18.tgz",
"integrity": "sha512-rrrBzz6UnEW0LV4OXRUzjg2XzwUGNjuJptKPxozvCQYOSuXQsRYcc7AM+p32ssBd+LdTX866pQ0QwRc2cUEAPw==",
"license": "MIT",
"dependencies": {
"cidr-matcher": "^2.1.1",
@@ -1476,9 +1476,9 @@
}
},
"node_modules/@jambonz/speech-utils": {
"version": "0.2.23",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.2.23.tgz",
"integrity": "sha512-o28IBoKzdnQoUUSC1XljHVkDPWhkTH+rFnI9OWYC6p1/f8px++4Y23/JMIAJVbxqKB1CIf531JhTwy4tCnQP7g==",
"version": "0.2.25",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.2.25.tgz",
"integrity": "sha512-dtMY4SoGhpvKM6slPvTlMz61Fd0Rxnajwm3sJVxlAYZnFtzfIvqg9rrYUN4h05tJbo/4TLs15/dVeicw7dTnew==",
"license": "MIT",
"dependencies": {
"23": "^0.0.0",
@@ -1517,9 +1517,9 @@
}
},
"node_modules/@jambonz/verb-specifications": {
"version": "0.0.113",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.113.tgz",
"integrity": "sha512-8dqEtUwEYw8wA7a0xYruBdwmUwUx1Q97483UgkVm3HcmJVkkkf9g7C3vI+jfZLyDZsOuu6CkFAf/KrM8DTx/KA==",
"version": "0.0.118",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.118.tgz",
"integrity": "sha512-1dGnc6TUCehjt1yGNuqh1uzk1xw9HhUm39aVUosQMHlnT0fK0ItikeJ0uttTjFastHNmPPxqJwb20wOvVGTCFg==",
"license": "MIT",
"dependencies": {
"debug": "^4.3.4",
@@ -3890,9 +3890,9 @@
}
},
"node_modules/drachtio-srf": {
"version": "5.0.11",
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-5.0.11.tgz",
"integrity": "sha512-QbRsKIaB4vmPExpob3n0uEgLrSfrCeXrKlpYGNwMSuajWlgcb3947ouCkAtpjLw+tXJfumC7BlBPPa+ZLRSIIA==",
"version": "5.0.13",
"resolved": "https://registry.npmjs.org/drachtio-srf/-/drachtio-srf-5.0.13.tgz",
"integrity": "sha512-IG62MLLzhXjQtbjX6T6I8jXK6QhWQwsblkO3+F2Zhcu4lXBO3W12rrKyAPsw6GimRSSXIkvMbn5/je8AU/xehQ==",
"license": "MIT",
"dependencies": {
"debug": "^3.2.7",

View File

@@ -27,14 +27,14 @@
"dependencies": {
"@aws-sdk/client-auto-scaling": "^3.549.0",
"@aws-sdk/client-sns": "^3.549.0",
"@jambonz/db-helpers": "^0.9.17",
"@jambonz/db-helpers": "^0.9.18",
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.15",
"@jambonz/speech-utils": "^0.2.23",
"@jambonz/speech-utils": "^0.2.25",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.14",
"@jambonz/verb-specifications": "^0.0.113",
"@jambonz/verb-specifications": "^0.0.118",
"@modelcontextprotocol/sdk": "^1.9.0",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",