Compare commits

..

52 Commits
v0.8.0 ... #301

Author SHA1 Message Date
Hoan Luu Huu
11c5047465 fix: Re-invite sip rec does not update media (#300)
* fix: Re-invite sip rec does not update media

* fix: Re-invite sip rec does not update media
2023-04-05 09:46:32 -04:00
Dave Horton
e19ea629f0 response to siprec invite should have a:recvonly if offer had a:sendonly (#298) 2023-04-04 21:02:21 -04:00
Antony Jukes
fe529c6bfb removed incorrect "this" from this.target.auth as it actually a local const. (#296) 2023-04-03 11:13:12 -04:00
Dave Horton
e980b82ec4 update to speech utils with improved microsoft tts 2023-04-01 13:20:59 -04:00
Hoan Luu Huu
318ca19791 fix: update speech utils version (#295)
* fix: update speech utils version

* update package-lock.json

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2023-04-01 11:35:13 -04:00
Dave Horton
e2bd211346 update to latest speech-utils 2023-03-31 16:50:46 -04:00
Dave Horton
410c07fae6 further fix for google model 2023-03-31 12:37:04 -04:00
Dave Horton
2ebfbfb3d8 google STT: when altLanguges are used default to a model that supports it 2023-03-31 12:31:14 -04:00
Dave Horton
a29795839d Bugfix/bot mode restart (#292)
* restart background gather if we get a new config with bargein=enable and changes to input types

* stop background gather properly before restarting

* fix: sticky background gather tasks must not be restarted if we have a new background gather

* fix undefined reference

* safety
2023-03-31 09:35:23 -04:00
Hoan Luu Huu
28088a4cdd feat: queue play audio (#290)
* feat: queue play audio

* fix: typo

* fix: typo

* fix: typo

* fix: typo

* fix: typo

* fix: typo

* fix: typo

* fix: typo

* fix: typo

* fix: typo
2023-03-30 15:31:54 -04:00
Dave Horton
afb381eec9 bugfix: setting altLanguages on Azure once left it turned on 2023-03-29 08:49:34 -04:00
Dave Horton
ed00ccb681 bump version 2023-03-28 14:14:25 -04:00
Dave Horton
6e945dde9a google stt fixes, including defaulting to phone_call model based on c… (#288)
* google stt fixes, including defaulting to phone_call model based on comparative model testing

* lint error
2023-03-28 10:02:03 -04:00
Dave Horton
efdea3e514 gather defaults to multiple utterances 2023-03-27 15:53:01 -04:00
Dave Horton
5131d524ce bugfix: allow for empty transcripts that nuance returns 2023-03-27 14:13:50 -04:00
Anton Voylenko
c0114015ea check encryption env on start (#286) 2023-03-26 15:45:20 -04:00
Anton Voylenko
a293ec09d0 add ENCRYPTION_SECRET variable (#283)
* add ENCRYPTION_SECRET variable

* add env for tests
2023-03-26 14:52:58 -04:00
Dave Horton
f71ae83ce4 bugfix: nuance on-prem stt 2023-03-26 14:26:36 -04:00
Hoan Luu Huu
0dd161913c fix: gather task should clear dtmf event before resolve (#284)
Co-authored-by: Quan HL <quanluuhoang8@gmail.com>
2023-03-26 12:32:51 -04:00
Dave Horton
63ab554908 google STT: default to command_and_search for Gather, as latest_short seems to have issues, various other fixes (#285) 2023-03-26 12:20:03 -04:00
Dave Horton
e1bd075ebc support for nuance on-prem stt/tts 2023-03-25 12:08:54 -04:00
Dave Horton
9de89258a1 update speech-utils@0.0.8 2023-03-24 14:50:08 -04:00
Dave Horton
145ed488db make the feature committed in dd4d9aa enabled only if JAMBONES_GATHER_CLEAR_GLOBAL_HINTS_ON_EMPTY_HINTS is set, as it is a behavior change 2023-03-23 07:54:39 -04:00
Dave Horton
c06a43adfa Gather: bugfix for alternate languages with Azure 2023-03-22 14:32:25 -04:00
Dave Horton
bebc82d194 bugfix: gather with google STT does not need to restart transcribing after end of utterance 2023-03-21 15:46:00 -04:00
Dave Horton
cdc82e99ff add minor logging 2023-03-21 12:35:02 -04:00
Dave Horton
dd4d9aa261 Gather: if an empty array of hints are supplied, this signals we should mask global hints for this collection 2023-03-21 12:16:12 -04:00
Dave Horton
1dcf9ee5a2 update to speech-utils@0.0.6 2023-03-21 08:27:25 -04:00
Dave Horton
4b28db0946 update to speech-utils@.0.0.5 2023-03-21 08:00:52 -04:00
Dave Horton
e7ff76b938 update to speech-utils with AWS tts bugfix 2023-03-20 15:35:20 -04:00
Dave Horton
f245275983 gather: remove duplicate and null hints, restart timeout on interim transcripts 2023-03-20 15:34:55 -04:00
Dave Horton
690deed89d prune unused logging 2023-03-19 12:04:02 -04:00
Dave Horton
26053ec709 update speech-utils with support for more audio formats for custom tts 2023-03-15 09:14:41 -04:00
Dave Horton
34e8203338 update to realtime-dbhelpers that factored out speech-utils 2023-03-14 10:07:29 -04:00
Hoan Luu Huu
7be3c64116 feat: update speech-ultil version 1.0.1 (#275)
* feat: update speech-ultil version 1.0.1

* feat: update speech-ultil version 1.0.1

* more fixes for custom stt

* more fixes

* fixes

* update drachtio-fsmrf

* pass url to mod_jambonz_transcribe

* transcription utils: handle custom results

* handle custom speech vendor errors

* add support for hints to custom speech

* change to custom speech options

* send hints as an array for custom speech

* update latest speech-utils

* transcribe: changes to support soniox

* bugfix: soniox transcribe

---------

Co-authored-by: Quan HL <quanluuhoang8@gmail.com>
Co-authored-by: Dave Horton <daveh@beachdognet.com>
2023-03-12 19:38:36 -04:00
Hoan Luu Huu
f71d3aed8b feat: forward PAI from inbound call to dial outbound call (#280)
* feat: forward PAI from inbound call to dial outbound call

* fix: review comment

---------

Co-authored-by: Quan HL <quanluuhoang8@gmail.com>
2023-03-09 08:58:19 -05:00
Hoan Luu Huu
5ab24337b2 fix: use TTS_FAILURE alert type for synthAudio (#278)
Co-authored-by: Quan HL <quanluuhoang8@gmail.com>
2023-03-08 07:42:06 -05:00
Dave Horton
2af76d94a6 bugfix: repeated ws failures should stop eventually 2023-03-07 16:29:00 -05:00
Dave Horton
4919c05181 add verb:status for play events (#274) 2023-03-03 15:56:50 -05:00
Dave Horton
3084a9d6ba #241 - gather bargein on Nuance has to be based on start of speech event (#246) 2023-03-03 13:39:23 -05:00
Dave Horton
1c683f1142 initial changes for soniox (#270)
* initial changes for soniox

* changes to gather for soniox

* parse soniox stt results

* handle <end> token for soniox

* soniox: handle empty array of words

* support for soniox hints

* add soniox storage options

* update to verb specs

* add support for transcribe

* compile soniox transcripts

* gather: kill no input timer for soniox when we get interim results

* fix buffering of soniox transcripts

* fix for compiling soniox transcript

* another fix for compiling soniox transcript

* another fix

* handling of <end> token

* fix soniox bug

* gather: fixes for soniox continous asr

* fix undefined variable reference

* fix prev commit

* bugfix: allow verb_status requests

* gather: for soniox no need to restart transcription after final transcription received

* update verb specs

* update verb specs, fixes for continuous asr:
2023-03-03 13:37:55 -05:00
Dave Horton
ab1947e23e bugfix: gather minBargeinWordCount defaults to 1 2023-02-24 10:27:05 -05:00
Dave Horton
5527abff09 bump version 2023-02-24 10:04:25 -05:00
Dave Horton
68827112fc further fix for early hints match in gather 2023-02-23 13:10:46 -05:00
Dave Horton
8a9a2df128 early hints fix that was not merged 2023-02-23 12:54:21 -05:00
Dave Horton
3a3544a5e8 remove some wordy logging 2023-02-23 12:32:41 -05:00
Dave Horton
cbeb706946 update to latest @jambonz/verb-specifications with less verbose logging 2023-02-23 12:16:14 -05:00
Dave Horton
f005262615 docs 2023-02-23 10:48:09 -05:00
Snyk bot
67ec28484c fix: package.json & package-lock.json to reduce vulnerabilities (#265)
The following vulnerabilities are fixed with an upgrade:
- https://snyk.io/vuln/SNYK-JS-UNDICI-3323844
- https://snyk.io/vuln/SNYK-JS-UNDICI-3323845
2023-02-23 10:26:06 -05:00
two56
803a944240 Use the request from CallSession for cancel (#268)
* Use the req from CallSession for cancel

* Check cs is set

---------

Co-authored-by: Matt Preskett <matt.preskett@netcall.com>
2023-02-23 09:13:44 -05:00
Snyk bot
a5cd342e46 fix: Dockerfile to reduce vulnerabilities (#269) 2023-02-22 14:04:39 -05:00
EgleH
e91feb64f5 Update node base image to node:18.14.0-alpine3.16 (#267) 2023-02-21 07:54:00 -05:00
26 changed files with 2787 additions and 469 deletions

View File

@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 node:18-alpine3.16 as base
FROM --platform=linux/amd64 node:18.14.1-alpine3.16 as base
RUN apk --update --no-cache add --virtual .builds-deps build-base python3

View File

@@ -18,8 +18,10 @@ Configuration is provided via environment variables:
|DRACHTIO_PORT| listening port of drachtio server for control connections (typically 9022)|yes|
|DRACHTIO_SECRET| shared secret|yes|
|ENABLE_METRICS| if 1, metrics will be generated|no|
|ENCRYPTION_SECRET| secret for credential encryption(JWT_SECRET is deprecated) |yes|
|GOOGLE_APPLICATION_CREDENTIALS| path to gcp service key file|yes|
|HTTP_PORT| tcp port to listen on for API requests from jambonz-api-server|yes|
|JAMBONES_GATHER_EARLY_HINTS_MATCH| if true and hints are provided, gather will opportunistically review interim transcripts if possible to reduce ASR latency |no|
|JAMBONES_FREESWITCH| IP:port:secret for Freeswitch server (e.g. '127.0.0.1:8021:JambonzR0ck$'|yes|
|JAMBONES_LOGLEVEL| log level for application, 'info' or 'debug'|no|
|JAMBONES_MYSQL_HOST| mysql host|yes|

1
app.js
View File

@@ -8,6 +8,7 @@ assert.ok(process.env.DRACHTIO_SECRET, 'missing DRACHTIO_SECRET env var');
assert.ok(process.env.JAMBONES_FREESWITCH, 'missing JAMBONES_FREESWITCH env var');
assert.ok(process.env.JAMBONES_REDIS_HOST, 'missing JAMBONES_REDIS_HOST env var');
assert.ok(process.env.JAMBONES_NETWORK_CIDR || process.env.K8S, 'missing JAMBONES_SUBNET env var');
assert.ok(process.env.ENCRYPTION_SECRET || process.env.JWT_SECRET, 'missing ENCRYPTION_SECRET env var');
const Srf = require('drachtio-srf');
const srf = new Srf();

View File

@@ -104,7 +104,7 @@ router.post('/', async(req, res) => {
proxy: `sip:${sbcAddress}`,
localSdp: ep.local.sdp
});
if (target.auth) opts.auth = this.target.auth;
if (target.auth) opts.auth = target.auth;
/**

View File

@@ -27,7 +27,11 @@ module.exports = function(srf, logger) {
function initLocals(req, res, next) {
const callId = req.get('Call-ID');
logger.info({callId}, 'new incoming call');
logger.info({
callId,
callingNumber: req.callingNumber,
calledNumber: req.calledNumber
}, 'new incoming call');
if (!req.has('X-Account-Sid')) {
logger.info('getAccountDetails - rejecting call due to missing X-Account-Sid header');
return res.send(500);

View File

@@ -511,12 +511,24 @@ class CallSession extends Emitter {
async enableBotMode(gather, autoEnable) {
try {
if (this.backgroundGatherTask) {
this.logger.info('CallSession:enableBotMode - bot mode currently enabled, ignoring request to start again');
return;
}
const t = normalizeJambones(this.logger, [gather]);
this.backgroundGatherTask = makeTask(this.logger, t[0]);
const task = makeTask(this.logger, t[0]);
if (this.isBotModeEnabled) {
const currInput = this.backgroundGatherTask.input;
const newInput = task.input;
if (JSON.stringify(currInput) === JSON.stringify(newInput)) {
this.logger.info('CallSession:enableBotMode - bot mode currently enabled, ignoring request to start again');
return;
}
else {
this.logger.info({currInput, newInput},
'CallSession:enableBotMode - restarting background gather to apply new input type');
this.backgroundGatherTask.sticky = false;
this.disableBotMode();
}
}
this.backgroundGatherTask = task;
this._bargeInEnabled = true;
this.backgroundGatherTask
.once('dtmf', this._clearTasks.bind(this, this.backgroundGatherTask))
@@ -528,13 +540,15 @@ class CallSession extends Emitter {
const {span, ctx} = this.rootSpan.startChildSpan(`background-gather:${this.backgroundGatherTask.summary}`);
this.backgroundGatherTask.span = span;
this.backgroundGatherTask.ctx = ctx;
this.backgroundGatherTask.sticky = autoEnable;
this.backgroundGatherTask.exec(this, resources)
.then(() => {
this.logger.info('CallSession:enableBotMode: gather completed');
this.backgroundGatherTask && this.backgroundGatherTask.removeAllListeners();
this.backgroundGatherTask && this.backgroundGatherTask.span.end();
const sticky = this.backgroundGatherTask?.sticky;
this.backgroundGatherTask = null;
if (autoEnable && !this.callGone && !this._stopping && this._bargeInEnabled) {
if (sticky && !this.callGone && !this._stopping && this._bargeInEnabled) {
this.logger.info('CallSession:enableBotMode: restarting background gather');
setImmediate(() => this.enableBotMode(gather, true));
}
@@ -636,7 +650,9 @@ class CallSession extends Emitter {
return {
speech_credential_sid: credential.speech_credential_sid,
client_id: credential.client_id,
secret: credential.secret
secret: credential.secret,
nuance_tts_uri: credential.nuance_tts_uri,
nuance_stt_uri: credential.nuance_stt_uri
};
}
else if ('deepgram' === vendor) {
@@ -645,6 +661,12 @@ class CallSession extends Emitter {
api_key: credential.api_key
};
}
else if ('soniox' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
api_key: credential.api_key
};
}
else if ('ibm' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
@@ -654,6 +676,14 @@ class CallSession extends Emitter {
stt_region: credential.stt_region
};
}
else if (vendor.startsWith('custom:')) {
return {
speech_credential_sid: credential.speech_credential_sid,
auth_token: credential.auth_token,
custom_stt_url: credential.custom_stt_url,
custom_tts_url: credential.custom_tts_url
};
}
}
else {
writeAlerts({
@@ -684,7 +714,7 @@ class CallSession extends Emitter {
let skip = false;
this.currentTask = task;
if (TaskName.Gather === task.name && this.isBotModeEnabled) {
if (this.backgroundGatherTask.updateTaskInProgress(task)) {
if (this.backgroundGatherTask.updateTaskInProgress(task) !== false) {
this.logger.info(`CallSession:exec skipping #${stackNum}:${taskNum}: ${task.name}`);
skip = true;
}
@@ -748,7 +778,6 @@ class CallSession extends Emitter {
trackTmpFile(path) {
// TODO: don't add if its already in the list (should we make it a set?)
this.logger.debug(`adding tmp file to track ${path}`);
this.tmpFiles.add(path);
}
@@ -1123,14 +1152,14 @@ class CallSession extends Emitter {
_injectTasks(newTasks) {
const gatherPos = this.tasks.map((t) => t.name).indexOf(TaskName.Gather);
const currentlyExecutingGather = this.currentTask?.name === TaskName.Gather;
/*
this.logger.debug({
currentTaskList: listTaskNames(this.tasks),
newContent: listTaskNames(newTasks),
currentlyExecutingGather,
gatherPos
}, 'CallSession:_injectTasks - starting');
*/
const killGather = () => {
this.logger.debug('CallSession:_injectTasks - killing current gather because we have new content');
this.currentTask.kill(this);
@@ -1139,10 +1168,11 @@ class CallSession extends Emitter {
if (-1 === gatherPos) {
/* no gather in the stack simply append tasks */
this.tasks.push(...newTasks);
/*
this.logger.debug({
updatedTaskList: listTaskNames(this.tasks)
}, 'CallSession:_injectTasks - completed (simple append)');
*/
/* we do need to kill the current gather if we are executing one */
if (currentlyExecutingGather) killGather();
return;
@@ -1170,12 +1200,10 @@ class CallSession extends Emitter {
this.replaceApplication(t);
}
else if (process.env.JAMBONES_INJECT_CONTENT) {
this.logger.debug({tasks: listTaskNames(t)}, 'CallSession:_onCommand - queueing tasks (injecting content)');
this._injectTasks(t);
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
}
else {
this.logger.debug({tasks: listTaskNames(t)}, 'CallSession:_onCommand - queueing tasks');
this.tasks.push(...t);
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
}
@@ -1219,7 +1247,7 @@ class CallSession extends Emitter {
this.logger.info(`CallSession:_onCommand - invalid command ${command}`);
}
if (this.wakeupResolver) {
this.logger.debug({resolution}, 'CallSession:_onCommand - got commands, waking up..');
//this.logger.debug({resolution}, 'CallSession:_onCommand - got commands, waking up..');
this.wakeupResolver(resolution);
this.wakeupResolver = null;
}

View File

@@ -1,6 +1,7 @@
const InboundCallSession = require('./inbound-call-session');
const {createSipRecPayload} = require('../utils/siprec-utils');
const {CallStatus} = require('../utils/constants');
const {parseSiprecPayload} = require('../utils/siprec-utils');
/**
* @classdesc Subclass of InboundCallSession. This represents a CallSession that is
* established for an inbound SIPREC call.
@@ -16,6 +17,32 @@ class SipRecCallSession extends InboundCallSession {
this.metadata = metadata;
}
async _onReinvite(req, res) {
try {
this.logger.info(req.payload, 'SipRec Re-INVITE payload');
const {sdp1: reSdp1, sdp2: reSdp2, metadata: reMetadata} = await parseSiprecPayload(req, this.logger);
this.sdp1 = reSdp1;
this.sdp2 = reSdp2;
this.metadata = reMetadata;
if (this.ep && this.ep2) {
let remoteSdp = this.sdp1.replace(/sendonly/, 'sendrecv');
const newSdp1 = await this.ep.modify(remoteSdp);
remoteSdp = this.sdp2.replace(/sendonly/, 'sendrecv');
const newSdp2 = await this.ep2.modify(remoteSdp);
const combinedSdp = await createSipRecPayload(newSdp1, newSdp2, this.logger);
res.send(200, {body: combinedSdp});
this.logger.info({offer: req.body, answer: combinedSdp}, 'SipRec handling reINVITE');
}
else {
this.logger.info('got SipRec reINVITE but no endpoint and media has not been released');
res.send(488);
}
} catch (err) {
this.logger.error(err, 'Error handling reinvite');
}
}
async answerSipRecCall() {
try {
this.ms = this.getMS();

View File

@@ -400,15 +400,19 @@ class TaskDial extends Task {
let fqdn;
if (!sbcAddress) throw new Error('no SBC found for outbound call');
this.headers = {
'X-Account-Sid': cs.accountSid,
...(req && req.has('X-CID') && {'X-CID': req.get('X-CID')}),
...(req && req.has('P-Asserted-Identity') && {'P-Asserted-Identity': req.get('P-Asserted-Identity')}),
// Put headers at the end to make sure opt.headers override all default behavior.
...this.headers
};
const opts = {
headers: req && req.has('X-CID') ? Object.assign(this.headers, {'X-CID': req.get('X-CID')}) : this.headers,
headers: this.headers,
proxy: `sip:${sbcAddress}`,
callingNumber: this.callerId || req.callingNumber
};
opts.headers = {
...opts.headers,
'X-Account-Sid': cs.accountSid
};
const t = this.target.find((t) => t.type === 'teams');
if (t) {

View File

@@ -7,8 +7,10 @@ const {
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
SonioxTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents
} = require('../utils/constants');
const makeTask = require('./make_task');
@@ -33,11 +35,13 @@ class TaskGather extends Task {
setChannelVarsForStt,
normalizeTranscription,
removeSpeechListeners,
setSpeechCredentialsAtRuntime
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
this.removeSpeechListeners = removeSpeechListeners;
this.compileSonioxTranscripts = compileSonioxTranscripts;
[
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
@@ -50,9 +54,9 @@ class TaskGather extends Task {
/* timeout of zero means no timeout */
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
this.interim = !!this.partialResultHook || this.bargein;
this.interim = !!this.partialResultHook || this.bargein || (this.timeout > 0);
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
this.minBargeinWordCount = this.data.minBargeinWordCount || 0;
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
if (this.data.recognizer) {
const recognizer = this.data.recognizer;
this.vendor = recognizer.vendor;
@@ -66,6 +70,11 @@ class TaskGather extends Task {
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
this.isContinuousAsr = this.asrTimeout > 0;
if (Array.isArray(this.data.recognizer.hints) &&
0 == this.data.recognizer.hints.length && process.env.JAMBONES_GATHER_CLEAR_GLOBAL_HINTS_ON_EMPTY_HINTS) {
logger.debug('Gather: an empty hints array was supplied, so we will mask global hints');
this.maskGlobalSttHints = true;
}
this.data.recognizer.hints = this.data.recognizer.hints || [];
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages || [];
}
@@ -85,13 +94,21 @@ class TaskGather extends Task {
/* buffer speech for continuous asr */
this._bufferedTranscripts = [];
/* buffer for soniox transcripts */
this._sonioxTranscripts = [];
this.parentTask = parentTask;
this.partialTranscriptsCount = 0;
}
get name() { return TaskName.Gather; }
get needsStt() { return this.input.includes('speech'); }
get wantsSingleUtterance() {
return this.data.recognizer?.singleUtterance === true;
}
get earlyMedia() {
return (this.sayTask && this.sayTask.earlyMedia) ||
(this.playTask && this.playTask.earlyMedia);
@@ -113,14 +130,17 @@ class TaskGather extends Task {
}
async exec(cs, {ep}) {
this.logger.debug('Gather:exec');
this.logger.debug({options: this.data}, 'Gather:exec');
await super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
if (cs.hasGlobalSttHints) {
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
const {hints, hintsBoost} = cs.globalSttHints;
this.data.recognizer.hints = this.data.recognizer.hints.concat(hints);
const setOfHints = new Set(this.data.recognizer.hints
.concat(hints)
.filter((h) => typeof h === 'string' && h.length > 0));
this.data.recognizer.hints = [...setOfHints];
if (!this.data.recognizer.hintsBoost && hintsBoost) this.data.recognizer.hintsBoost = hintsBoost;
this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
'Gather:exec - applying global sttHints');
@@ -142,7 +162,8 @@ class TaskGather extends Task {
asrDtmfTerminationDigit: this.asrDtmfTerminationDigit
}, 'Gather:exec - enabling continuous ASR since it is turned on for the session');
}
if (process.env.JAMBONZ_GATHER_EARLY_HINTS_MATCH && this.needsStt &&
const {JAMBONZ_GATHER_EARLY_HINTS_MATCH, JAMBONES_GATHER_EARLY_HINTS_MATCH} = process.env;
if ((JAMBONZ_GATHER_EARLY_HINTS_MATCH || JAMBONES_GATHER_EARLY_HINTS_MATCH) && this.needsStt &&
!this.isContinuousAsr &&
this.data.recognizer?.hints?.length > 0 && this.data.recognizer?.hints?.length <= 10) {
this.earlyHintsMatch = true;
@@ -180,7 +201,6 @@ class TaskGather extends Task {
throw new Error(`No speech-to-text service credentials for ${this.vendor} have been configured`);
}
this.logger.info({sttCredentials: this.sttCredentials}, 'Gather:exec - sttCredentials');
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
/* get nuance access token */
const {client_id, secret} = this.sttCredentials;
@@ -199,7 +219,6 @@ class TaskGather extends Task {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
this.logger.debug('Gather:exec - calling _initSpeech');
this._initSpeech(cs, ep)
.then(() => {
if (this.killed) {
@@ -287,6 +306,7 @@ class TaskGather extends Task {
this._killAudio(cs);
this.ep.removeAllListeners('dtmf');
clearTimeout(this.interDigitTimer);
this._clearAsrTimer();
this.playTask?.span.end();
this.sayTask?.span.end();
this._resolve('killed');
@@ -300,6 +320,7 @@ class TaskGather extends Task {
const {timeout} = opts;
this.timeout = timeout;
this._startTimer();
return true;
}
_onDtmf(cs, ep, evt) {
@@ -339,7 +360,6 @@ class TaskGather extends Task {
async _initSpeech(cs, ep) {
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
this.logger.debug(opts, 'TaskGather:_initSpeech - channel vars');
switch (this.vendor) {
case 'google':
this.bugname = 'google_transcribe';
@@ -371,8 +391,6 @@ class TaskGather extends Task {
this._onTranscriptionComplete.bind(this, cs, ep));
ep.addCustomEventListener(NuanceTranscriptionEvents.VadDetected,
this._onVadDetected.bind(this, cs, ep));
ep.addCustomEventListener(NuanceTranscriptionEvents.Error,
this._onNuanceError.bind(this, cs, ep));
/* stall timers until prompt finishes playing */
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
@@ -388,14 +406,17 @@ class TaskGather extends Task {
this._onDeepGramConnectFailure.bind(this, cs, ep));
break;
case 'soniox':
this.bugname = 'soniox_transcribe';
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onIbmConnect.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
this._onIbmConnectFailure.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
this._onIbmError.bind(this, cs, ep));
break;
case 'nvidia':
@@ -408,8 +429,6 @@ class TaskGather extends Task {
this._onTranscriptionComplete.bind(this, cs, ep));
ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
this._onVadDetected.bind(this, cs, ep));
ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
this._onNvidiaError.bind(this, cs, ep));
/* I think nvidia has this (??) - stall timers until prompt finishes playing */
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
@@ -418,11 +437,23 @@ class TaskGather extends Task {
break;
default:
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
this.notifyTaskDone();
throw new Error(`Invalid vendor ${this.vendor}`);
if (this.vendor.startsWith('custom:')) {
this.bugname = `${this.vendor}_transcribe`;
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onJambonzConnect.bind(this, cs, ep));
ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure,
this._onJambonzConnectFailure.bind(this, cs, ep));
break;
}
else {
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
this.notifyTaskDone();
throw new Error(`Invalid vendor ${this.vendor}`);
}
}
/* common handler for all stt engine errors */
ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
await ep.set(opts)
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
}
@@ -522,7 +553,7 @@ class TaskGather extends Task {
// make sure this is not a transcript from answering machine detection
const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished');
this.logger.debug({evt, bugname, finished}, 'Gather:_onTranscription');
this.logger.debug({evt, bugname, finished}, `Gather:_onTranscription for vendor ${this.vendor}`);
if (bugname && this.bugname !== bugname) return;
if (this.vendor === 'ibm') {
@@ -530,12 +561,26 @@ class TaskGather extends Task {
}
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language);
if (evt.alternatives.length === 0) {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
return;
}
/* fast path: our first partial transcript exactly matches an early hint */
if (this.earlyHintsMatch && evt.is_final === false && this.partialTranscriptsCount++ === 0) {
const transcript = evt.alternatives[0].transcript?.toLowerCase();
const hints = this.data.recognizer?.hints || [];
if (hints.find((h) => h.toLowerCase() === transcript)) {
this.logger.debug({evt}, 'Gather:_onTranscription: early hint match');
this._resolve('speech', evt);
return;
}
}
/* count words for bargein feature */
const words = evt.alternatives[0]?.transcript.split(' ').length;
const bufferedWords = this._bufferedTranscripts.reduce((count, e) => {
return count + e.alternatives[0]?.transcript.split(' ').length;
}, 0);
const bufferedWords = this._sonioxTranscripts.length +
this._bufferedTranscripts.reduce((count, e) => count + e.alternatives[0]?.transcript.split(' ').length, 0);
if (evt.is_final) {
if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
@@ -544,7 +589,6 @@ class TaskGather extends Task {
}
else {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
//this._startTranscribing(ep);
}
return;
}
@@ -568,7 +612,9 @@ class TaskGather extends Task {
return this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
}
this._startAsrTimer();
return this._startTranscribing(ep);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
}
else {
if (this.bargein && (words + bufferedWords) < this.minBargeinWordCount) {
@@ -579,6 +625,12 @@ class TaskGather extends Task {
return;
}
else {
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
}
this._resolve('speech', evt);
}
}
@@ -589,6 +641,8 @@ class TaskGather extends Task {
others do not.
*/
//const isStableEnough = typeof evt.stability === 'undefined' || evt.stability > GATHER_STABILITY_THRESHOLD;
this._clearTimer();
this._startTimer();
if (this.bargein && (words + bufferedWords) >= this.minBargeinWordCount) {
if (!this.playComplete) {
this.logger.debug({transcript: evt.alternatives[0].transcript}, 'killing audio due to speech');
@@ -602,6 +656,13 @@ class TaskGather extends Task {
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt},
this.cs.callInfo, httpHeaders));
}
if (this.vendor === 'soniox') {
this._clearTimer();
if (evt.vendor.finalWords.length) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');
this._sonioxTranscripts.push(evt.vendor.finalWords);
}
}
}
}
_onEndOfUtterance(cs, ep) {
@@ -610,34 +671,53 @@ class TaskGather extends Task {
this._killAudio(cs);
}
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length) {
/**
* By default, Gather asks google for multiple utterances.
* The reason is that we can sometimes get an 'end_of_utterance' event without
* getting a transcription. This can happen if someone coughs or mumbles.
* For that reason don't ask for a single utterance and we'll terminate the transcribe operation
* once we get a final transcript.
* However, if the usr has specified a singleUtterance, then we need to restart here
* since we dont have a final transcript yet.
*/
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsSingleUtterance) {
this._startTranscribing(ep);
}
}
_onStartOfSpeech(cs, ep) {
this.logger.debug('TaskGather:_onStartOfSpeech');
if (this.bargein) {
this._killAudio(cs);
}
}
_onTranscriptionComplete(cs, ep) {
this.logger.debug('TaskGather:_onTranscriptionComplete');
}
_onNuanceError(cs, ep, evt) {
const {code, error, details} = evt;
if (code === 404 && error === 'No speech') {
this.logger.debug({code, error, details}, 'TaskGather:_onNuanceError');
return this._resolve('timeout');
}
this.logger.info({code, error, details}, 'TaskGather:_onNuanceError');
if (code === 413 && error === 'Too much speech') {
return this._resolve('timeout');
}
}
_onNvidiaError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
}
_onDeepgramConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onDeepgramConnect');
}
_onJambonzConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onJambonzConnect');
}
_onJambonzError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskGather:_onJambonzError');
const {writeAlerts, AlertType} = cs.srf.locals;
if (this.vendor === 'nuance') {
const {code, error} = evt;
if (code === 404 && error === 'No speech') return this._resolve('timeout');
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
}
this.logger.info({evt}, 'TaskGather:_onJambonzError');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
}
_onDeepGramConnectFailure(cs, _ep, evt) {
const {reason} = evt;
@@ -652,6 +732,19 @@ class TaskGather extends Task {
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor deepgram: ${reason}`});
this.notifyTaskDone();
}
_onJambonzConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskGather:_onJambonzConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
this.notifyTaskDone();
}
_onIbmConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onIbmConnect');
@@ -701,6 +794,10 @@ class TaskGather extends Task {
if (this.resolved) return;
this.resolved = true;
// Clear dtmf event
if (this.dtmfBargein) {
this.ep.removeAllListeners('dtmf');
}
clearTimeout(this.interDigitTimer);
this._clearTimer();

View File

@@ -2,6 +2,7 @@ const Task = require('./task');
const {TaskName, TaskPreconditions, ListenEvents, ListenStatus} = require('../utils/constants');
const makeTask = require('./make_task');
const moment = require('moment');
const MAX_PLAY_AUDIO_QUEUE_SIZE = 10;
class TaskListen extends Task {
constructor(logger, opts, parentTask) {
@@ -20,6 +21,8 @@ class TaskListen extends Task {
this.nested = parentTask instanceof Task;
this.results = {};
this.playAudioQueue = [];
this.isPlayingAudioFromQueue = false;
if (this.transcribe) this.transcribeTask = makeTask(logger, {'transcribe': opts.transcribe}, this);
}
@@ -58,6 +61,7 @@ class TaskListen extends Task {
super.kill(cs);
this.logger.debug(`TaskListen:kill endpoint connected? ${this.ep && this.ep.connected}`);
this._clearTimer();
this.playAudioQueue = [];
if (this.ep && this.ep.connected) {
this.logger.debug('TaskListen:kill closing websocket');
try {
@@ -184,16 +188,36 @@ class TaskListen extends Task {
this.notifyTaskDone();
}
async _onPlayAudio(ep, evt) {
this.logger.info(`received play_audio event: ${JSON.stringify(evt)}`);
async _playAudio(ep, evt, logger) {
try {
const results = await ep.play(evt.file);
this.logger.debug(`Finished playing file, result: ${JSON.stringify(results)}`);
logger.debug(`Finished playing file, result: ${JSON.stringify(results)}`);
ep.forkAudioSendText({type: 'playDone', data: Object.assign({id: evt.id}, results)});
} catch (err) {
logger.error({err}, 'Error playing file');
}
catch (err) {
this.logger.error({err}, 'Error playing file');
}
async _onPlayAudio(ep, evt) {
this.logger.info(`received play_audio event: ${JSON.stringify(evt)}`);
if (!evt.queuePlay) {
this.playAudioQueue = [];
this._playAudio(ep, evt, this.logger);
this.isPlayingAudioFromQueue = false;
return;
}
if (this.playAudioQueue.length <= MAX_PLAY_AUDIO_QUEUE_SIZE) {
this.playAudioQueue.push(evt);
}
if (this.isPlayingAudioFromQueue) return;
this.isPlayingAudioFromQueue = true;
while (this.playAudioQueue.length > 0) {
await this._playAudio(ep, this.playAudioQueue.shift(), this.logger);
}
this.isPlayingAudioFromQueue = false;
}
_onKillAudio(ep) {

View File

@@ -37,6 +37,7 @@ class TaskPlay extends Task {
}, this.timeoutSecs * 1000);
}
try {
this.notifyStatus({event: 'start-playback'});
while (!this.killed && (this.loop === 'forever' || this.loop--) && this.ep.connected) {
if (cs.isInConference) {
const {memberId, confName, confUuid} = cs;
@@ -80,7 +81,8 @@ class TaskPlay extends Task {
this.killPlayToConfMember(this.ep, memberId, confName);
}
else {
await this.ep.api('uuid_break', this.ep.uuid).catch((err) => this.logger.info(err, 'Error killing audio'));
this.notifyStatus({event: 'kill-playback'});
this.ep.api('uuid_break', this.ep.uuid).catch((err) => this.logger.info(err, 'Error killing audio'));
}
}
}

View File

@@ -27,7 +27,7 @@ class TaskRestDial extends Task {
*/
async exec(cs) {
await super.exec(cs);
this.req = cs.req;
this.canCancel = true;
this._setCallTimer();
await this.awaitTaskDone();
@@ -36,15 +36,15 @@ class TaskRestDial extends Task {
kill(cs) {
super.kill(cs);
this._clearCallTimer();
if (this.req) {
this.req.cancel();
this.req = null;
if (this.canCancel && cs?.req) {
this.canCancel = false;
cs.req.cancel();
}
this.notifyTaskDone();
}
async _onConnect(dlg) {
this.req = null;
this.canCancel = false;
const cs = this.callSession;
cs.setDialog(dlg);
@@ -79,7 +79,7 @@ class TaskRestDial extends Task {
_onCallStatus(status) {
this.logger.debug(`CallStatus: ${status}`);
if (status >= 200) {
this.req = null;
this.canCancel = false;
this._clearCallTimer();
if (status !== 200) this.notifyTaskDone();
}

View File

@@ -143,7 +143,7 @@ class TaskSay extends Task {
span.end();
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.TTS_NOT_PROVISIONED,
alert_type: AlertType.TTS_FAILURE,
vendor,
detail: err.message
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
@@ -154,7 +154,6 @@ class TaskSay extends Task {
const arr = this.text.map((t) => generateAudio(t));
const filepath = (await Promise.all(arr)).filter((fp) => fp && fp.length);
this.logger.debug({filepath}, 'synthesized files for tts');
this.notifyStatus({event: 'start-playback'});
while (!this.killed && (this.loop === 'forever' || this.loop--) && this.ep?.connected) {

View File

@@ -3,12 +3,14 @@ const {
TaskName,
TaskPreconditions,
GoogleTranscriptionEvents,
AzureTranscriptionEvents,
AwsTranscriptionEvents,
NuanceTranscriptionEvents,
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
SonioxTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents
} = require('../utils/constants');
const { normalizeJambones } = require('@jambonz/verb-specifications');
@@ -22,11 +24,13 @@ class TaskTranscribe extends Task {
setChannelVarsForStt,
normalizeTranscription,
removeSpeechListeners,
setSpeechCredentialsAtRuntime
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
this.removeSpeechListeners = removeSpeechListeners;
this.compileSonioxTranscripts = compileSonioxTranscripts;
this.transcriptionHook = this.data.transcriptionHook;
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
@@ -40,6 +44,9 @@ class TaskTranscribe extends Task {
/* let credentials be supplied in the recognizer object at runtime */
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
/* buffer for soniox transcripts */
this._sonioxTranscripts = [];
recognizer.hints = recognizer.hints || [];
recognizer.altLanguages = recognizer.altLanguages || [];
}
@@ -183,8 +190,6 @@ class TaskTranscribe extends Task {
this._onStartOfSpeech.bind(this, cs, ep, channel));
ep.addCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete,
this._onTranscriptionComplete.bind(this, cs, ep, channel));
ep.addCustomEventListener(AzureTranscriptionEvents.Error,
this._onNuanceError.bind(this, cs, ep, channel));
break;
case 'deepgram':
this.bugname = 'deepgram_transcribe';
@@ -195,7 +200,11 @@ class TaskTranscribe extends Task {
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
break;
case 'soniox':
this.bugname = 'soniox_transcribe';
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
@@ -204,8 +213,6 @@ class TaskTranscribe extends Task {
this._onIbmConnect.bind(this, cs, ep, channel));
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
this._onIbmConnectFailure.bind(this, cs, ep, channel));
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
this._onIbmError.bind(this, cs, ep, channel));
break;
case 'nvidia':
@@ -218,14 +225,13 @@ class TaskTranscribe extends Task {
this._onTranscriptionComplete.bind(this, cs, ep));
ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
this._onVadDetected.bind(this, cs, ep));
ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
this._onNvidiaError.bind(this, cs, ep));
break;
default:
throw new Error(`Invalid vendor ${this.vendor}`);
}
/* common handler for all stt engine errors */
ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
await ep.set(opts)
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
@@ -253,8 +259,11 @@ class TaskTranscribe extends Task {
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - before normalization');
evt = this.normalizeTranscription(evt, this.vendor, channel, this.language);
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
if (evt.alternatives.length === 0) {
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
return;
}
if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
if (['microsoft', 'deepgram'].includes(this.vendor)) {
@@ -267,6 +276,15 @@ class TaskTranscribe extends Task {
return;
}
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
if (evt.is_final) {
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
}
}
if (this.transcriptionHook) {
const b3 = this.getTracingPropagation();
const httpHeaders = b3 && {b3};
@@ -315,20 +333,6 @@ class TaskTranscribe extends Task {
this._timer = null;
}
}
_onNuanceError(_cs, _ep, _channel, evt) {
const {code, error, details} = evt;
if (code === 404 && error === 'No speech') {
this.logger.debug({code, error, details}, 'TaskTranscribe:_onNuanceError');
return this._resolve('timeout');
}
this.logger.info({code, error, details}, 'TaskTranscribe:_onNuanceError');
if (code === 413 && error === 'Too much speech') {
return this._resolve('timeout');
}
}
_onNvidiaError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
}
_onDeepgramConnect(_cs, _ep) {
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
}
@@ -365,7 +369,25 @@ class TaskTranscribe extends Task {
this.notifyTaskDone();
}
_onIbmError(cs, _ep, _channel, evt) {
this.logger.info({evt}, 'TaskGather:_onIbmError');
this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
}
_onJambonzError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
const {writeAlerts, AlertType} = cs.srf.locals;
if (this.vendor === 'nuance') {
const {code, error} = evt;
if (code === 404 && error === 'No speech') return this._resolve('timeout');
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
}
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
}

View File

@@ -267,7 +267,6 @@ module.exports = (logger) => {
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, onNoSpeechDetected.bind(null, cs, ep, task));
}
logger.debug({sttOpts}, 'startAmd: setting channel vars');
await ep.set(sttOpts).catch((err) => logger.info(err, 'Error setting channel variables'));
amd

View File

@@ -86,6 +86,10 @@
"ConnectFailure": "deepgram_transcribe::connect_failed",
"Connect": "deepgram_transcribe::connect"
},
"SonioxTranscriptionEvents": {
"Transcription": "soniox_transcribe::transcription",
"Error": "soniox_transcribe::error"
},
"IbmTranscriptionEvents": {
"Transcription": "ibm_transcribe::transcription",
"ConnectFailure": "ibm_transcribe::connect_failed",
@@ -106,6 +110,12 @@
"NoSpeechDetected": "azure_transcribe::no_speech_detected",
"VadDetected": "azure_transcribe::vad_detected"
},
"JambonzTranscriptionEvents": {
"Transcription": "jambonz_transcribe::transcription",
"ConnectFailure": "jambonz_transcribe::connect_failed",
"Connect": "jambonz_transcribe::connect",
"Error": "jambonz_transcribe::error"
},
"ListenEvents": {
"Connect": "mod_audio_fork::connect",
"ConnectFailure": "mod_audio_fork::connect_failed",
@@ -147,6 +157,7 @@
"queue:status",
"dial:confirm",
"verb:hook",
"verb:status",
"jambonz:error"
],
"RecordState": {

View File

@@ -50,6 +50,8 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
obj.secret = o.secret;
obj.nuance_tts_uri = o.nuance_tts_uri;
obj.nuance_stt_uri = o.nuance_stt_uri;
}
else if ('ibm' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
@@ -62,6 +64,16 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
}
else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
}
else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = o.auth_token;
obj.custom_stt_url = o.custom_stt_url;
obj.custom_tts_url = o.custom_tts_url;
}
} catch (err) {
console.log(err);
}
@@ -79,47 +91,13 @@ module.exports = (logger, srf) => {
const [r2] = await pp.query(sqlSpeechCredentials, account_sid);
const speech = r2.map(speechMapper);
/* search at the service provider level if we don't find it at the account level */
const haveGoogle = speech.find((s) => s.vendor === 'google');
const haveAws = speech.find((s) => s.vendor === 'aws');
const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft');
const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid');
const haveNuance = speech.find((s) => s.vendor === 'nuance');
const haveDeepgram = speech.find((s) => s.vendor === 'deepgram');
const haveIbm = speech.find((s) => s.vendor === 'ibm');
if (!haveGoogle || !haveAws || !haveMicrosoft || !haveWellsaid || !haveNuance || !haveIbm || !haveDeepgram) {
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
if (r3.length) {
if (!haveGoogle) {
const google = r3.find((s) => s.vendor === 'google');
if (google) speech.push(speechMapper(google));
}
if (!haveAws) {
const aws = r3.find((s) => s.vendor === 'aws');
if (aws) speech.push(speechMapper(aws));
}
if (!haveMicrosoft) {
const ms = r3.find((s) => s.vendor === 'microsoft');
if (ms) speech.push(speechMapper(ms));
}
if (!haveWellsaid) {
const wellsaid = r3.find((s) => s.vendor === 'wellsaid');
if (wellsaid) speech.push(speechMapper(wellsaid));
}
if (!haveNuance) {
const nuance = r3.find((s) => s.vendor === 'nuance');
if (nuance) speech.push(speechMapper(nuance));
}
if (!haveDeepgram) {
const deepgram = r3.find((s) => s.vendor === 'deepgram');
if (deepgram) speech.push(speechMapper(deepgram));
}
if (!haveIbm) {
const ibm = r3.find((s) => s.vendor === 'ibm');
if (ibm) speech.push(speechMapper(ibm));
}
/* add service provider creds unless we have that vendor at the account level */
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
r3.forEach((s) => {
if (!speech.find((s2) => s2.vendor === s.vendor)) {
speech.push(speechMapper(s));
}
}
});
return {
...r[0],

View File

@@ -2,9 +2,9 @@ const crypto = require('crypto');
const algorithm = process.env.LEGACY_CRYPTO ? 'aes-256-ctr' : 'aes-256-cbc';
const iv = crypto.randomBytes(16);
const secretKey = crypto.createHash('sha256')
.update(String(process.env.JWT_SECRET))
.update(process.env.ENCRYPTION_SECRET || process.env.JWT_SECRET)
.digest('base64')
.substr(0, 32);
.substring(0, 32);
const encrypt = (text) => {
const cipher = crypto.createCipheriv(algorithm, secretKey, iv);
@@ -25,8 +25,8 @@ const decrypt = (data) => {
throw err;
}
const decipher = crypto.createDecipheriv(algorithm, secretKey, Buffer.from(hash.iv, 'hex'));
const decrpyted = Buffer.concat([decipher.update(Buffer.from(hash.content, 'hex')), decipher.final()]);
return decrpyted.toString();
const decrypted = Buffer.concat([decipher.update(Buffer.from(hash.content, 'hex')), decipher.final()]);
return decrypted.toString();
};
module.exports = {

View File

@@ -138,7 +138,6 @@ function installSrfLocals(srf, logger) {
retrieveCall,
listCalls,
deleteCall,
synthAudio,
createHash,
retrieveHash,
deleteKey,
@@ -151,11 +150,17 @@ function installSrfLocals(srf, logger) {
pushBack,
popFront,
removeFromList,
lengthOfList,
getListPosition,
lengthOfList,
} = require('@jambonz/realtimedb-helpers')({
host: process.env.JAMBONES_REDIS_HOST,
port: process.env.JAMBONES_REDIS_PORT || 6379
}, logger, tracer);
const {
synthAudio,
getNuanceAccessToken,
getIbmAccessToken,
} = require('@jambonz/realtimedb-helpers')({
} = require('@jambonz/speech-utils')({
host: process.env.JAMBONES_REDIS_HOST,
port: process.env.JAMBONES_REDIS_PORT || 6379
}, logger, tracer);

View File

@@ -242,7 +242,8 @@ const createSipRecPayload = (sdp1, sdp2, logger) => {
.replace(/a=sendonly\r\n/g, '')
.replace(/a=direction:both\r\n/g, '');
*/
return combinedSdp;
return combinedSdp.replace(/sendrecv/g, 'recvonly');
};
module.exports = { parseSiprecPayload, createSipRecPayload } ;

View File

@@ -5,7 +5,9 @@ const {
AwsTranscriptionEvents,
NuanceTranscriptionEvents,
DeepgramTranscriptionEvents,
NvidiaTranscriptionEvents
SonioxTranscriptionEvents,
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents
} = require('./constants');
const stickyVars = {
@@ -27,6 +29,7 @@ const stickyVars = {
'AZURE_SERVICE_ENDPOINT_ID',
'AZURE_REQUEST_SNR',
'AZURE_PROFANITY_OPTION',
'AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES',
'AZURE_SERVICE_ENDPOINT',
'AZURE_INITIAL_SPEECH_TIMEOUT_MS',
'AZURE_USE_OUTPUT_FORMAT_DETAILED',
@@ -88,9 +91,70 @@ const stickyVars = {
],
nvidia: [
'NVIDIA_HINTS'
],
soniox: [
'SONIOX_PROFANITY_FILTER',
'SONIOX_MODEL'
]
};
const compileSonioxTranscripts = (finalWordChunks, channel, language) => {
const words = finalWordChunks.flat();
const transcript = words.reduce((acc, word) => {
if (word.text === '<end>') return acc;
if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`;
return `${acc} ${word.text}`;
}, '').trim();
const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== '<end>');
const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length;
const alternatives = [{transcript, confidence}];
return {
language_code: language,
channel_tag: channel,
is_final: true,
alternatives,
vendor: {
name: 'soniox',
evt: words
}
};
};
const normalizeSoniox = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
/* an <end> token indicates the end of an utterance */
const endTokenPos = evt.words.map((w) => w.text).indexOf('<end>');
const endpointReached = endTokenPos !== -1;
const words = endpointReached ? evt.words.slice(0, endTokenPos) : evt.words;
/* note: we can safely ignore words after the <end> token as they will be returned again */
const finalWords = words.filter((word) => word.is_final);
const nonFinalWords = words.filter((word) => !word.is_final);
const is_final = endpointReached && finalWords.length > 0;
const transcript = words.reduce((acc, word) => {
if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`;
else return `${acc} ${word.text}`;
}, '').trim();
const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== '<end>');
const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length;
const alternatives = [{transcript, confidence}];
return {
language_code: language,
channel_tag: channel,
is_final,
alternatives,
vendor: {
name: 'soniox',
endpointReached,
evt: copy,
finalWords,
nonFinalWords
}
};
};
const normalizeDeepgram = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const alternatives = (evt.channel?.alternatives || [])
@@ -161,6 +225,15 @@ const normalizeGoogle = (evt, channel, language) => {
};
};
const normalizeCustom = (evt, channel, language) => {
return {
language_code: language,
channel_tag: channel,
is_final: evt.is_final,
alternatives: [evt.alternatives[0]]
};
};
const normalizeNuance = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
@@ -221,7 +294,7 @@ const normalizeAws = (evt, channel, language) => {
module.exports = (logger) => {
const normalizeTranscription = (evt, vendor, channel, language) => {
logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
//logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
switch (vendor) {
case 'deepgram':
return normalizeDeepgram(evt, channel, language);
@@ -237,7 +310,12 @@ module.exports = (logger) => {
return normalizeIbm(evt, channel, language);
case 'nvidia':
return normalizeNvidia(evt, channel, language);
case 'soniox':
return normalizeSoniox(evt, channel, language);
default:
if (vendor.startsWith('custom:')) {
return normalizeCustom(evt, channel, language);
}
logger.error(`Unknown vendor ${vendor}`);
return evt;
}
@@ -247,6 +325,7 @@ module.exports = (logger) => {
let opts = {};
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
const vad = {enable, voiceMs, mode};
const vendor = rOpts.vendor;
/* voice activity detection works across vendors */
opts = {
@@ -256,59 +335,43 @@ module.exports = (logger) => {
...(vad.enable && typeof vad.mode === 'number' && {RECOGNIZER_VAD_MODE: vad.mode}),
};
if ('google' === rOpts.vendor) {
if ('google' === vendor) {
let model = 'phone_call';
if (rOpts.altLanguages.length > 0) model = task.name === TaskName.Gather ? 'command_and_search' : 'latest_long';
opts = {
...opts,
...(sttCredentials &&
{GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
...(rOpts.enhancedModel &&
{GOOGLE_SPEECH_USE_ENHANCED: 1}),
...(rOpts.separateRecognitionPerChannel &&
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
...(rOpts.profanityFilter &&
{GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
...(rOpts.punctuation &&
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
...(rOpts.words &&
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
...((rOpts.singleUtterance || task.name === TaskName.Gather) &&
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
...(rOpts.diarization &&
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
...(sttCredentials && {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
...(rOpts.separateRecognitionPerChannel && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
...(rOpts.separateRecognitionPerChanne === false && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
...(rOpts.profanityFilter && {GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
...(rOpts.punctuation && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
...(rOpts.words && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
...(rOpts.singleUtterance && {GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
...(rOpts.diarization && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 &&
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}),
...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 &&
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}),
...(rOpts.enhancedModel === false &&
{GOOGLE_SPEECH_USE_ENHANCED: 0}),
...(rOpts.separateRecognitionPerChannel === false &&
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
...(rOpts.profanityFilter === false &&
{GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
...(rOpts.punctuation === false &&
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
...(rOpts.words == false &&
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
...((rOpts.singleUtterance === false || task.name === TaskName.Transcribe) &&
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 0}),
...(rOpts.diarization === false &&
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
...(rOpts.enhancedModel && {GOOGLE_SPEECH_USE_ENHANCED: 1}),
...(rOpts.profanityFilter === false && {GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
...(rOpts.punctuation === false && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
...(rOpts.words == false && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
...(rOpts.diarization === false && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
{GOOGLE_SPEECH_HINTS: rOpts.hints.join(',')}),
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
{GOOGLE_SPEECH_HINTS: JSON.stringify(rOpts.hints)}),
...(typeof rOpts.hintsBoost === 'number' &&
{GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
...(typeof rOpts.hintsBoost === 'number' && {GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
...(rOpts.altLanguages.length > 0 &&
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: rOpts.altLanguages.join(',')}),
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
...(rOpts.interactionType &&
{GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}),
...{GOOGLE_SPEECH_MODEL: rOpts.model || (task.name === TaskName.Gather ? 'latest_short' : 'phone_call')},
...(rOpts.naicsCode > 0 &&
{GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
...{GOOGLE_SPEECH_MODEL: rOpts.model || model},
...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
};
}
else if (['aws', 'polly'].includes(rOpts.vendor)) {
else if (['aws', 'polly'].includes(vendor)) {
opts = {
...opts,
...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
@@ -321,7 +384,7 @@ module.exports = (logger) => {
}),
};
}
else if ('microsoft' === rOpts.vendor) {
else if ('microsoft' === vendor) {
opts = {
...opts,
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
@@ -329,7 +392,7 @@ module.exports = (logger) => {
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
{AZURE_SPEECH_HINTS: rOpts.hints.map((h) => h.phrase).join(',')}),
...(rOpts.altLanguages && rOpts.altLanguages.length > 0 &&
{AZURE_SERVICE_ENDPOINT_ID: rOpts.sttCredentials}),
{AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}),
...(rOpts.profanityOption && {AZURE_PROFANITY_OPTION: rOpts.profanityOption}),
...(rOpts.azureServiceEndpoint && {AZURE_SERVICE_ENDPOINT: rOpts.azureServiceEndpoint}),
@@ -346,7 +409,7 @@ module.exports = (logger) => {
{AZURE_SERVICE_ENDPOINT_ID: sttCredentials.custom_stt_endpoint})
};
}
else if ('nuance' === rOpts.vendor) {
else if ('nuance' === vendor) {
/**
* Note: all nuance options are in recognizer.nuanceOptions, should migrate
* other vendor settings to similar nested structure
@@ -354,12 +417,9 @@ module.exports = (logger) => {
const {nuanceOptions = {}} = rOpts;
opts = {
...opts,
...(sttCredentials.access_token) &&
{NUANCE_ACCESS_TOKEN: sttCredentials.access_token},
...(sttCredentials.krypton_endpoint) &&
{NUANCE_KRYPTON_ENDPOINT: sttCredentials.krypton_endpoint},
...(nuanceOptions.topic) &&
{NUANCE_TOPIC: nuanceOptions.topic},
...(sttCredentials.access_token) && {NUANCE_ACCESS_TOKEN: sttCredentials.access_token},
...(sttCredentials.nuance_stt_uri) && {NUANCE_KRYPTON_ENDPOINT: sttCredentials.nuance_stt_uri},
...(nuanceOptions.topic) && {NUANCE_TOPIC: nuanceOptions.topic},
...(nuanceOptions.utteranceDetectionMode) &&
{NUANCE_UTTERANCE_DETECTION_MODE: nuanceOptions.utteranceDetectionMode},
...(nuanceOptions.punctuation || rOpts.punctuation) && {NUANCE_PUNCTUATION: nuanceOptions.punctuation},
@@ -397,7 +457,7 @@ module.exports = (logger) => {
{NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)},
};
}
else if ('deepgram' === rOpts.vendor) {
else if ('deepgram' === vendor) {
const {deepgramOptions = {}} = rOpts;
opts = {
...opts,
@@ -441,7 +501,30 @@ module.exports = (logger) => {
{DEEPGRAM_SPEECH_TAG: deepgramOptions.tag}
};
}
else if ('ibm' === rOpts.vendor) {
else if ('soniox' === vendor) {
const {sonioxOptions = {}} = rOpts;
const {storage = {}} = sonioxOptions;
opts = {
...opts,
...(sttCredentials.api_key) &&
{SONIOX_API_KEY: sttCredentials.api_key},
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
{SONIOX_HINTS: rOpts.hints.join(',')}),
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
{SONIOX_HINTS: JSON.stringify(rOpts.hints)}),
...(typeof rOpts.hintsBoost === 'number' &&
{SONIOX_HINTS_BOOST: rOpts.hintsBoost}),
...(sonioxOptions.model) &&
{SONIOX_MODEL: sonioxOptions.model},
...((sonioxOptions.profanityFilter || rOpts.profanityFilter) && {SONIOX_PROFANITY_FILTER: 1}),
...(storage?.id && {SONIOX_STORAGE_ID: storage.id}),
...(storage?.id && storage?.title && {SONIOX_STORAGE_TITLE: storage.title}),
...(storage?.id && storage?.disableStoreAudio && {SONIOX_STORAGE_DISABLE_AUDIO: 1}),
...(storage?.id && storage?.disableStoreTranscript && {SONIOX_STORAGE_DISABLE_TRANSCRIPT: 1}),
...(storage?.id && storage?.disableSearch && {SONIOX_STORAGE_DISABLE_SEARCH: 1})
};
}
else if ('ibm' === vendor) {
const {ibmOptions = {}} = rOpts;
opts = {
...opts,
@@ -465,7 +548,7 @@ module.exports = (logger) => {
{IBM_SPEECH_WATSON_LEARNING_OPT_OUT: ibmOptions.watsonLearningOptOut}
};
}
else if ('nvidia' === rOpts.vendor) {
else if ('nvidia' === vendor) {
const {nvidiaOptions = {}} = rOpts;
opts = {
...opts,
@@ -494,11 +577,29 @@ module.exports = (logger) => {
{NVIDIA_CUSTOM_CONFIGURATION: JSON.stringify(nvidiaOptions.customConfiguration)}),
};
}
else if (vendor.startsWith('custom:')) {
let {options = {}} = rOpts;
const {auth_token, custom_stt_url} = sttCredentials;
options = {
...options,
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
{hints: rOpts.hints}),
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
{hints: JSON.stringify(rOpts.hints)}),
...(typeof rOpts.hintsBoost === 'number' && {hintsBoost: rOpts.hintsBoost})
};
stickyVars[rOpts.vendor].forEach((key) => {
opts = {
...opts,
JAMBONZ_STT_API_KEY: auth_token,
JAMBONZ_STT_URL: custom_stt_url,
...(Object.keys(options).length > 0 && {JAMBONZ_STT_OPTIONS: JSON.stringify(options)}),
};
}
(stickyVars[vendor] || []).forEach((key) => {
if (!opts[key]) opts[key] = '';
});
logger.debug({opts}, 'recognizer channel vars');
return opts;
};
@@ -517,25 +618,32 @@ module.exports = (logger) => {
ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Transcription);
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Connect);
ep.removeCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure);
ep.removeCustomEventListener(SonioxTranscriptionEvents.Transcription);
ep.removeCustomEventListener(NvidiaTranscriptionEvents.Transcription);
ep.removeCustomEventListener(NvidiaTranscriptionEvents.TranscriptionComplete);
ep.removeCustomEventListener(NvidiaTranscriptionEvents.StartOfSpeech);
ep.removeCustomEventListener(NvidiaTranscriptionEvents.Error);
ep.removeCustomEventListener(NvidiaTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(JambonzTranscriptionEvents.Transcription);
ep.removeCustomEventListener(JambonzTranscriptionEvents.Connect);
ep.removeCustomEventListener(JambonzTranscriptionEvents.ConnectFailure);
ep.removeCustomEventListener(JambonzTranscriptionEvents.Error);
};
const setSpeechCredentialsAtRuntime = (recognizer) => {
if (!recognizer) return;
if (recognizer.vendor === 'nuance') {
const {clientId, secret} = recognizer.nuanceOptions || {};
const {clientId, secret, kryptonEndpoint} = recognizer.nuanceOptions || {};
if (clientId && secret) return {client_id: clientId, secret};
if (kryptonEndpoint) return {nuance_stt_uri: kryptonEndpoint};
}
else if (recognizer.vendor === 'nvidia') {
const {rivaUri} = recognizer.nvidiaOptions || {};
@@ -545,6 +653,10 @@ module.exports = (logger) => {
const {apiKey} = recognizer.deepgramOptions || {};
if (apiKey) return {api_key: apiKey};
}
else if (recognizer.vendor === 'soniox') {
const {apiKey} = recognizer.sonioxOptions || {};
if (apiKey) return {api_key: apiKey};
}
else if (recognizer.vendor === 'ibm') {
const {ttsApiKey, ttsRegion, sttApiKey, sttRegion, instanceId} = recognizer.ibmOptions || {};
if (ttsApiKey || sttApiKey) return {
@@ -561,6 +673,7 @@ module.exports = (logger) => {
normalizeTranscription,
setChannelVarsForStt,
removeSpeechListeners,
setSpeechCredentialsAtRuntime
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts
};
};

View File

@@ -219,7 +219,6 @@ class WsRequestor extends BaseRequestor {
}
_setHandlers(ws) {
this.logger.debug('WsRequestor:_setHandlers');
ws
.once('open', this._onOpen.bind(this, ws))
.once('close', this._onClose.bind(this))
@@ -274,6 +273,7 @@ class WsRequestor extends BaseRequestor {
}, 'WsRequestor - unexpected response');
this.emit('connection-failure');
this.emit('not-ready', new Error(`${res.statusCode} ${res.statusMessage}`));
this.connections++;
}
_onSocketClosed() {
@@ -338,7 +338,7 @@ class WsRequestor extends BaseRequestor {
this.logger.info({url: this.url}, `WsRequestor:_recvAck - ack to unknown msgid ${msgid}, discarding`);
return;
}
this.logger.debug({url: this.url}, `WsRequestor:_recvAck - received response to ${msgid}`);
//this.logger.debug({url: this.url}, `WsRequestor:_recvAck - received response to ${msgid}`);
this.messagesInFlight.delete(msgid);
const {success} = obj;
success && success(data);

2409
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "jambonz-feature-server",
"version": "v0.8.0",
"version": "v0.8.2",
"main": "app.js",
"engines": {
"node": ">= 10.16.0"
@@ -19,17 +19,18 @@
"bugs": {},
"scripts": {
"start": "node app",
"test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:JambonzR0ck$:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
"test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 ENCRYPTION_SECRET=foobar DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:JambonzR0ck$:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
"coverage": "./node_modules/.bin/nyc --reporter html --report-dir ./coverage npm run test",
"jslint": "eslint app.js lib"
},
"dependencies": {
"@jambonz/db-helpers": "^0.7.4",
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/realtimedb-helpers": "^0.6.5",
"@jambonz/realtimedb-helpers": "^0.7.0",
"@jambonz/speech-utils": "^0.0.12",
"@jambonz/stats-collector": "^0.1.6",
"@jambonz/time-series": "^0.2.5",
"@jambonz/verb-specifications": "^0.0.3",
"@jambonz/verb-specifications": "^0.0.11",
"@opentelemetry/api": "^1.4.0",
"@opentelemetry/exporter-jaeger": "^1.9.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
@@ -43,7 +44,7 @@
"bent": "^7.3.12",
"debug": "^4.3.4",
"deepcopy": "^2.1.0",
"drachtio-fsmrf": "^3.0.18",
"drachtio-fsmrf": "^3.0.20",
"drachtio-srf": "^4.5.23",
"express": "^4.18.2",
"ip": "^1.1.8",
@@ -56,7 +57,7 @@
"short-uuid": "^4.2.2",
"sinon": "^15.0.1",
"to-snake-case": "^1.0.0",
"undici": "^5.16.0",
"undici": "^5.19.1",
"uuid-random": "^1.3.2",
"verify-aws-sns-signature": "^0.1.0",
"ws": "^8.9.0",

View File

@@ -206,7 +206,49 @@ test('\'gather\' test - deepgram', async(t) => {
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
//console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'gather: succeeds when using deepgram credentials');
'gather: succeeds when using deepgram credentials');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);
disconnect();
t.error(err);
}
});
test('\'gather\' test - soniox', async(t) => {
if (!process.env.SONIOX_API_KEY ) {
t.pass('skipping soniox tests');
return t.end();
}
clearModule.all();
const {srf, disconnect} = require('../app');
try {
await connect(srf);
// GIVEN
let verbs = [
{
"verb": "gather",
"input": ["speech"],
"recognizer": {
"vendor": "deepgram",
"hints": ["customer support", "sales", "human resources", "HR"],
"deepgramOptions": {
"apiKey": process.env.SONIOX_API_KEY
}
},
"timeout": 10,
"actionHook": "/actionHook"
}
];
let from = "gather_success";
provisionCallHook(from, verbs);
// THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'gather: succeeds when using soniox credentials');
disconnect();
} catch (err) {

View File

@@ -143,7 +143,7 @@ test('\'transcribe\' test - deepgram', async(t) => {
{
"verb": "transcribe",
"recognizer": {
"vendor": "aws",
"vendor": "deepgram",
"hints": ["customer support", "sales", "human resources", "HR"],
"deepgramOptions": {
"apiKey": process.env.DEEPGRAM_API_KEY
@@ -160,6 +160,47 @@ test('\'transcribe\' test - deepgram', async(t) => {
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'transcribe: succeeds when using deepgram credentials');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);
disconnect();
t.error(err);
}
});
test('\'transcribe\' test - soniox', async(t) => {
if (!process.env.SONIOX_API_KEY ) {
t.pass('skipping soniox tests');
return t.end();
}
clearModule.all();
const {srf, disconnect} = require('../app');
try {
await connect(srf);
// GIVEN
let verbs = [
{
"verb": "transcribe",
"recognizer": {
"vendor": "soniox",
"hints": ["customer support", "sales", "human resources", "HR"],
"deepgramOptions": {
"apiKey": process.env.SONIOX_API_KEY
}
},
"transcriptionHook": "/transcriptionHook"
}
];
let from = "gather_success";
provisionCallHook(from, verbs);
// THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'transcribe: succeeds when using soniox credentials');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);