mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-01-25 02:07:56 +00:00
Compare commits
52 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11c5047465 | ||
|
|
e19ea629f0 | ||
|
|
fe529c6bfb | ||
|
|
e980b82ec4 | ||
|
|
318ca19791 | ||
|
|
e2bd211346 | ||
|
|
410c07fae6 | ||
|
|
2ebfbfb3d8 | ||
|
|
a29795839d | ||
|
|
28088a4cdd | ||
|
|
afb381eec9 | ||
|
|
ed00ccb681 | ||
|
|
6e945dde9a | ||
|
|
efdea3e514 | ||
|
|
5131d524ce | ||
|
|
c0114015ea | ||
|
|
a293ec09d0 | ||
|
|
f71ae83ce4 | ||
|
|
0dd161913c | ||
|
|
63ab554908 | ||
|
|
e1bd075ebc | ||
|
|
9de89258a1 | ||
|
|
145ed488db | ||
|
|
c06a43adfa | ||
|
|
bebc82d194 | ||
|
|
cdc82e99ff | ||
|
|
dd4d9aa261 | ||
|
|
1dcf9ee5a2 | ||
|
|
4b28db0946 | ||
|
|
e7ff76b938 | ||
|
|
f245275983 | ||
|
|
690deed89d | ||
|
|
26053ec709 | ||
|
|
34e8203338 | ||
|
|
7be3c64116 | ||
|
|
f71d3aed8b | ||
|
|
5ab24337b2 | ||
|
|
2af76d94a6 | ||
|
|
4919c05181 | ||
|
|
3084a9d6ba | ||
|
|
1c683f1142 | ||
|
|
ab1947e23e | ||
|
|
5527abff09 | ||
|
|
68827112fc | ||
|
|
8a9a2df128 | ||
|
|
3a3544a5e8 | ||
|
|
cbeb706946 | ||
|
|
f005262615 | ||
|
|
67ec28484c | ||
|
|
803a944240 | ||
|
|
a5cd342e46 | ||
|
|
e91feb64f5 |
@@ -1,4 +1,4 @@
|
||||
FROM --platform=linux/amd64 node:18-alpine3.16 as base
|
||||
FROM --platform=linux/amd64 node:18.14.1-alpine3.16 as base
|
||||
|
||||
RUN apk --update --no-cache add --virtual .builds-deps build-base python3
|
||||
|
||||
|
||||
@@ -18,8 +18,10 @@ Configuration is provided via environment variables:
|
||||
|DRACHTIO_PORT| listening port of drachtio server for control connections (typically 9022)|yes|
|
||||
|DRACHTIO_SECRET| shared secret|yes|
|
||||
|ENABLE_METRICS| if 1, metrics will be generated|no|
|
||||
|ENCRYPTION_SECRET| secret for credential encryption(JWT_SECRET is deprecated) |yes|
|
||||
|GOOGLE_APPLICATION_CREDENTIALS| path to gcp service key file|yes|
|
||||
|HTTP_PORT| tcp port to listen on for API requests from jambonz-api-server|yes|
|
||||
|JAMBONES_GATHER_EARLY_HINTS_MATCH| if true and hints are provided, gather will opportunistically review interim transcripts if possible to reduce ASR latency |no|
|
||||
|JAMBONES_FREESWITCH| IP:port:secret for Freeswitch server (e.g. '127.0.0.1:8021:JambonzR0ck$'|yes|
|
||||
|JAMBONES_LOGLEVEL| log level for application, 'info' or 'debug'|no|
|
||||
|JAMBONES_MYSQL_HOST| mysql host|yes|
|
||||
|
||||
1
app.js
1
app.js
@@ -8,6 +8,7 @@ assert.ok(process.env.DRACHTIO_SECRET, 'missing DRACHTIO_SECRET env var');
|
||||
assert.ok(process.env.JAMBONES_FREESWITCH, 'missing JAMBONES_FREESWITCH env var');
|
||||
assert.ok(process.env.JAMBONES_REDIS_HOST, 'missing JAMBONES_REDIS_HOST env var');
|
||||
assert.ok(process.env.JAMBONES_NETWORK_CIDR || process.env.K8S, 'missing JAMBONES_SUBNET env var');
|
||||
assert.ok(process.env.ENCRYPTION_SECRET || process.env.JWT_SECRET, 'missing ENCRYPTION_SECRET env var');
|
||||
|
||||
const Srf = require('drachtio-srf');
|
||||
const srf = new Srf();
|
||||
|
||||
@@ -104,7 +104,7 @@ router.post('/', async(req, res) => {
|
||||
proxy: `sip:${sbcAddress}`,
|
||||
localSdp: ep.local.sdp
|
||||
});
|
||||
if (target.auth) opts.auth = this.target.auth;
|
||||
if (target.auth) opts.auth = target.auth;
|
||||
|
||||
|
||||
/**
|
||||
|
||||
@@ -27,7 +27,11 @@ module.exports = function(srf, logger) {
|
||||
|
||||
function initLocals(req, res, next) {
|
||||
const callId = req.get('Call-ID');
|
||||
logger.info({callId}, 'new incoming call');
|
||||
logger.info({
|
||||
callId,
|
||||
callingNumber: req.callingNumber,
|
||||
calledNumber: req.calledNumber
|
||||
}, 'new incoming call');
|
||||
if (!req.has('X-Account-Sid')) {
|
||||
logger.info('getAccountDetails - rejecting call due to missing X-Account-Sid header');
|
||||
return res.send(500);
|
||||
|
||||
@@ -511,12 +511,24 @@ class CallSession extends Emitter {
|
||||
|
||||
async enableBotMode(gather, autoEnable) {
|
||||
try {
|
||||
if (this.backgroundGatherTask) {
|
||||
this.logger.info('CallSession:enableBotMode - bot mode currently enabled, ignoring request to start again');
|
||||
return;
|
||||
}
|
||||
const t = normalizeJambones(this.logger, [gather]);
|
||||
this.backgroundGatherTask = makeTask(this.logger, t[0]);
|
||||
const task = makeTask(this.logger, t[0]);
|
||||
|
||||
if (this.isBotModeEnabled) {
|
||||
const currInput = this.backgroundGatherTask.input;
|
||||
const newInput = task.input;
|
||||
if (JSON.stringify(currInput) === JSON.stringify(newInput)) {
|
||||
this.logger.info('CallSession:enableBotMode - bot mode currently enabled, ignoring request to start again');
|
||||
return;
|
||||
}
|
||||
else {
|
||||
this.logger.info({currInput, newInput},
|
||||
'CallSession:enableBotMode - restarting background gather to apply new input type');
|
||||
this.backgroundGatherTask.sticky = false;
|
||||
this.disableBotMode();
|
||||
}
|
||||
}
|
||||
this.backgroundGatherTask = task;
|
||||
this._bargeInEnabled = true;
|
||||
this.backgroundGatherTask
|
||||
.once('dtmf', this._clearTasks.bind(this, this.backgroundGatherTask))
|
||||
@@ -528,13 +540,15 @@ class CallSession extends Emitter {
|
||||
const {span, ctx} = this.rootSpan.startChildSpan(`background-gather:${this.backgroundGatherTask.summary}`);
|
||||
this.backgroundGatherTask.span = span;
|
||||
this.backgroundGatherTask.ctx = ctx;
|
||||
this.backgroundGatherTask.sticky = autoEnable;
|
||||
this.backgroundGatherTask.exec(this, resources)
|
||||
.then(() => {
|
||||
this.logger.info('CallSession:enableBotMode: gather completed');
|
||||
this.backgroundGatherTask && this.backgroundGatherTask.removeAllListeners();
|
||||
this.backgroundGatherTask && this.backgroundGatherTask.span.end();
|
||||
const sticky = this.backgroundGatherTask?.sticky;
|
||||
this.backgroundGatherTask = null;
|
||||
if (autoEnable && !this.callGone && !this._stopping && this._bargeInEnabled) {
|
||||
if (sticky && !this.callGone && !this._stopping && this._bargeInEnabled) {
|
||||
this.logger.info('CallSession:enableBotMode: restarting background gather');
|
||||
setImmediate(() => this.enableBotMode(gather, true));
|
||||
}
|
||||
@@ -636,7 +650,9 @@ class CallSession extends Emitter {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
client_id: credential.client_id,
|
||||
secret: credential.secret
|
||||
secret: credential.secret,
|
||||
nuance_tts_uri: credential.nuance_tts_uri,
|
||||
nuance_stt_uri: credential.nuance_stt_uri
|
||||
};
|
||||
}
|
||||
else if ('deepgram' === vendor) {
|
||||
@@ -645,6 +661,12 @@ class CallSession extends Emitter {
|
||||
api_key: credential.api_key
|
||||
};
|
||||
}
|
||||
else if ('soniox' === vendor) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
api_key: credential.api_key
|
||||
};
|
||||
}
|
||||
else if ('ibm' === vendor) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
@@ -654,6 +676,14 @@ class CallSession extends Emitter {
|
||||
stt_region: credential.stt_region
|
||||
};
|
||||
}
|
||||
else if (vendor.startsWith('custom:')) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
auth_token: credential.auth_token,
|
||||
custom_stt_url: credential.custom_stt_url,
|
||||
custom_tts_url: credential.custom_tts_url
|
||||
};
|
||||
}
|
||||
}
|
||||
else {
|
||||
writeAlerts({
|
||||
@@ -684,7 +714,7 @@ class CallSession extends Emitter {
|
||||
let skip = false;
|
||||
this.currentTask = task;
|
||||
if (TaskName.Gather === task.name && this.isBotModeEnabled) {
|
||||
if (this.backgroundGatherTask.updateTaskInProgress(task)) {
|
||||
if (this.backgroundGatherTask.updateTaskInProgress(task) !== false) {
|
||||
this.logger.info(`CallSession:exec skipping #${stackNum}:${taskNum}: ${task.name}`);
|
||||
skip = true;
|
||||
}
|
||||
@@ -748,7 +778,6 @@ class CallSession extends Emitter {
|
||||
|
||||
trackTmpFile(path) {
|
||||
// TODO: don't add if its already in the list (should we make it a set?)
|
||||
this.logger.debug(`adding tmp file to track ${path}`);
|
||||
this.tmpFiles.add(path);
|
||||
}
|
||||
|
||||
@@ -1123,14 +1152,14 @@ class CallSession extends Emitter {
|
||||
_injectTasks(newTasks) {
|
||||
const gatherPos = this.tasks.map((t) => t.name).indexOf(TaskName.Gather);
|
||||
const currentlyExecutingGather = this.currentTask?.name === TaskName.Gather;
|
||||
|
||||
/*
|
||||
this.logger.debug({
|
||||
currentTaskList: listTaskNames(this.tasks),
|
||||
newContent: listTaskNames(newTasks),
|
||||
currentlyExecutingGather,
|
||||
gatherPos
|
||||
}, 'CallSession:_injectTasks - starting');
|
||||
|
||||
*/
|
||||
const killGather = () => {
|
||||
this.logger.debug('CallSession:_injectTasks - killing current gather because we have new content');
|
||||
this.currentTask.kill(this);
|
||||
@@ -1139,10 +1168,11 @@ class CallSession extends Emitter {
|
||||
if (-1 === gatherPos) {
|
||||
/* no gather in the stack simply append tasks */
|
||||
this.tasks.push(...newTasks);
|
||||
/*
|
||||
this.logger.debug({
|
||||
updatedTaskList: listTaskNames(this.tasks)
|
||||
}, 'CallSession:_injectTasks - completed (simple append)');
|
||||
|
||||
*/
|
||||
/* we do need to kill the current gather if we are executing one */
|
||||
if (currentlyExecutingGather) killGather();
|
||||
return;
|
||||
@@ -1170,12 +1200,10 @@ class CallSession extends Emitter {
|
||||
this.replaceApplication(t);
|
||||
}
|
||||
else if (process.env.JAMBONES_INJECT_CONTENT) {
|
||||
this.logger.debug({tasks: listTaskNames(t)}, 'CallSession:_onCommand - queueing tasks (injecting content)');
|
||||
this._injectTasks(t);
|
||||
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
|
||||
}
|
||||
else {
|
||||
this.logger.debug({tasks: listTaskNames(t)}, 'CallSession:_onCommand - queueing tasks');
|
||||
this.tasks.push(...t);
|
||||
this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
|
||||
}
|
||||
@@ -1219,7 +1247,7 @@ class CallSession extends Emitter {
|
||||
this.logger.info(`CallSession:_onCommand - invalid command ${command}`);
|
||||
}
|
||||
if (this.wakeupResolver) {
|
||||
this.logger.debug({resolution}, 'CallSession:_onCommand - got commands, waking up..');
|
||||
//this.logger.debug({resolution}, 'CallSession:_onCommand - got commands, waking up..');
|
||||
this.wakeupResolver(resolution);
|
||||
this.wakeupResolver = null;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
const InboundCallSession = require('./inbound-call-session');
|
||||
const {createSipRecPayload} = require('../utils/siprec-utils');
|
||||
const {CallStatus} = require('../utils/constants');
|
||||
const {parseSiprecPayload} = require('../utils/siprec-utils');
|
||||
/**
|
||||
* @classdesc Subclass of InboundCallSession. This represents a CallSession that is
|
||||
* established for an inbound SIPREC call.
|
||||
@@ -16,6 +17,32 @@ class SipRecCallSession extends InboundCallSession {
|
||||
this.metadata = metadata;
|
||||
}
|
||||
|
||||
async _onReinvite(req, res) {
|
||||
try {
|
||||
this.logger.info(req.payload, 'SipRec Re-INVITE payload');
|
||||
const {sdp1: reSdp1, sdp2: reSdp2, metadata: reMetadata} = await parseSiprecPayload(req, this.logger);
|
||||
this.sdp1 = reSdp1;
|
||||
this.sdp2 = reSdp2;
|
||||
this.metadata = reMetadata;
|
||||
|
||||
if (this.ep && this.ep2) {
|
||||
let remoteSdp = this.sdp1.replace(/sendonly/, 'sendrecv');
|
||||
const newSdp1 = await this.ep.modify(remoteSdp);
|
||||
remoteSdp = this.sdp2.replace(/sendonly/, 'sendrecv');
|
||||
const newSdp2 = await this.ep2.modify(remoteSdp);
|
||||
const combinedSdp = await createSipRecPayload(newSdp1, newSdp2, this.logger);
|
||||
res.send(200, {body: combinedSdp});
|
||||
this.logger.info({offer: req.body, answer: combinedSdp}, 'SipRec handling reINVITE');
|
||||
}
|
||||
else {
|
||||
this.logger.info('got SipRec reINVITE but no endpoint and media has not been released');
|
||||
res.send(488);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.error(err, 'Error handling reinvite');
|
||||
}
|
||||
}
|
||||
|
||||
async answerSipRecCall() {
|
||||
try {
|
||||
this.ms = this.getMS();
|
||||
|
||||
@@ -400,15 +400,19 @@ class TaskDial extends Task {
|
||||
let fqdn;
|
||||
|
||||
if (!sbcAddress) throw new Error('no SBC found for outbound call');
|
||||
this.headers = {
|
||||
'X-Account-Sid': cs.accountSid,
|
||||
...(req && req.has('X-CID') && {'X-CID': req.get('X-CID')}),
|
||||
...(req && req.has('P-Asserted-Identity') && {'P-Asserted-Identity': req.get('P-Asserted-Identity')}),
|
||||
// Put headers at the end to make sure opt.headers override all default behavior.
|
||||
...this.headers
|
||||
};
|
||||
|
||||
const opts = {
|
||||
headers: req && req.has('X-CID') ? Object.assign(this.headers, {'X-CID': req.get('X-CID')}) : this.headers,
|
||||
headers: this.headers,
|
||||
proxy: `sip:${sbcAddress}`,
|
||||
callingNumber: this.callerId || req.callingNumber
|
||||
};
|
||||
opts.headers = {
|
||||
...opts.headers,
|
||||
'X-Account-Sid': cs.accountSid
|
||||
};
|
||||
|
||||
const t = this.target.find((t) => t.type === 'teams');
|
||||
if (t) {
|
||||
|
||||
@@ -7,8 +7,10 @@ const {
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
|
||||
const makeTask = require('./make_task');
|
||||
@@ -33,11 +35,13 @@ class TaskGather extends Task {
|
||||
setChannelVarsForStt,
|
||||
normalizeTranscription,
|
||||
removeSpeechListeners,
|
||||
setSpeechCredentialsAtRuntime
|
||||
setSpeechCredentialsAtRuntime,
|
||||
compileSonioxTranscripts
|
||||
} = require('../utils/transcription-utils')(logger);
|
||||
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||
this.normalizeTranscription = normalizeTranscription;
|
||||
this.removeSpeechListeners = removeSpeechListeners;
|
||||
this.compileSonioxTranscripts = compileSonioxTranscripts;
|
||||
|
||||
[
|
||||
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
|
||||
@@ -50,9 +54,9 @@ class TaskGather extends Task {
|
||||
|
||||
/* timeout of zero means no timeout */
|
||||
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
|
||||
this.interim = !!this.partialResultHook || this.bargein;
|
||||
this.interim = !!this.partialResultHook || this.bargein || (this.timeout > 0);
|
||||
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
|
||||
this.minBargeinWordCount = this.data.minBargeinWordCount || 0;
|
||||
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
|
||||
if (this.data.recognizer) {
|
||||
const recognizer = this.data.recognizer;
|
||||
this.vendor = recognizer.vendor;
|
||||
@@ -66,6 +70,11 @@ class TaskGather extends Task {
|
||||
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
|
||||
this.isContinuousAsr = this.asrTimeout > 0;
|
||||
|
||||
if (Array.isArray(this.data.recognizer.hints) &&
|
||||
0 == this.data.recognizer.hints.length && process.env.JAMBONES_GATHER_CLEAR_GLOBAL_HINTS_ON_EMPTY_HINTS) {
|
||||
logger.debug('Gather: an empty hints array was supplied, so we will mask global hints');
|
||||
this.maskGlobalSttHints = true;
|
||||
}
|
||||
this.data.recognizer.hints = this.data.recognizer.hints || [];
|
||||
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages || [];
|
||||
}
|
||||
@@ -85,13 +94,21 @@ class TaskGather extends Task {
|
||||
/* buffer speech for continuous asr */
|
||||
this._bufferedTranscripts = [];
|
||||
|
||||
/* buffer for soniox transcripts */
|
||||
this._sonioxTranscripts = [];
|
||||
|
||||
this.parentTask = parentTask;
|
||||
this.partialTranscriptsCount = 0;
|
||||
}
|
||||
|
||||
get name() { return TaskName.Gather; }
|
||||
|
||||
get needsStt() { return this.input.includes('speech'); }
|
||||
|
||||
get wantsSingleUtterance() {
|
||||
return this.data.recognizer?.singleUtterance === true;
|
||||
}
|
||||
|
||||
get earlyMedia() {
|
||||
return (this.sayTask && this.sayTask.earlyMedia) ||
|
||||
(this.playTask && this.playTask.earlyMedia);
|
||||
@@ -113,14 +130,17 @@ class TaskGather extends Task {
|
||||
}
|
||||
|
||||
async exec(cs, {ep}) {
|
||||
this.logger.debug('Gather:exec');
|
||||
this.logger.debug({options: this.data}, 'Gather:exec');
|
||||
await super.exec(cs);
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
|
||||
|
||||
if (cs.hasGlobalSttHints) {
|
||||
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
|
||||
const {hints, hintsBoost} = cs.globalSttHints;
|
||||
this.data.recognizer.hints = this.data.recognizer.hints.concat(hints);
|
||||
const setOfHints = new Set(this.data.recognizer.hints
|
||||
.concat(hints)
|
||||
.filter((h) => typeof h === 'string' && h.length > 0));
|
||||
this.data.recognizer.hints = [...setOfHints];
|
||||
if (!this.data.recognizer.hintsBoost && hintsBoost) this.data.recognizer.hintsBoost = hintsBoost;
|
||||
this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
|
||||
'Gather:exec - applying global sttHints');
|
||||
@@ -142,7 +162,8 @@ class TaskGather extends Task {
|
||||
asrDtmfTerminationDigit: this.asrDtmfTerminationDigit
|
||||
}, 'Gather:exec - enabling continuous ASR since it is turned on for the session');
|
||||
}
|
||||
if (process.env.JAMBONZ_GATHER_EARLY_HINTS_MATCH && this.needsStt &&
|
||||
const {JAMBONZ_GATHER_EARLY_HINTS_MATCH, JAMBONES_GATHER_EARLY_HINTS_MATCH} = process.env;
|
||||
if ((JAMBONZ_GATHER_EARLY_HINTS_MATCH || JAMBONES_GATHER_EARLY_HINTS_MATCH) && this.needsStt &&
|
||||
!this.isContinuousAsr &&
|
||||
this.data.recognizer?.hints?.length > 0 && this.data.recognizer?.hints?.length <= 10) {
|
||||
this.earlyHintsMatch = true;
|
||||
@@ -180,7 +201,6 @@ class TaskGather extends Task {
|
||||
throw new Error(`No speech-to-text service credentials for ${this.vendor} have been configured`);
|
||||
}
|
||||
|
||||
this.logger.info({sttCredentials: this.sttCredentials}, 'Gather:exec - sttCredentials');
|
||||
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
|
||||
/* get nuance access token */
|
||||
const {client_id, secret} = this.sttCredentials;
|
||||
@@ -199,7 +219,6 @@ class TaskGather extends Task {
|
||||
this._startTimer();
|
||||
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
||||
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
||||
this.logger.debug('Gather:exec - calling _initSpeech');
|
||||
this._initSpeech(cs, ep)
|
||||
.then(() => {
|
||||
if (this.killed) {
|
||||
@@ -287,6 +306,7 @@ class TaskGather extends Task {
|
||||
this._killAudio(cs);
|
||||
this.ep.removeAllListeners('dtmf');
|
||||
clearTimeout(this.interDigitTimer);
|
||||
this._clearAsrTimer();
|
||||
this.playTask?.span.end();
|
||||
this.sayTask?.span.end();
|
||||
this._resolve('killed');
|
||||
@@ -300,6 +320,7 @@ class TaskGather extends Task {
|
||||
const {timeout} = opts;
|
||||
this.timeout = timeout;
|
||||
this._startTimer();
|
||||
return true;
|
||||
}
|
||||
|
||||
_onDtmf(cs, ep, evt) {
|
||||
@@ -339,7 +360,6 @@ class TaskGather extends Task {
|
||||
|
||||
async _initSpeech(cs, ep) {
|
||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
||||
this.logger.debug(opts, 'TaskGather:_initSpeech - channel vars');
|
||||
switch (this.vendor) {
|
||||
case 'google':
|
||||
this.bugname = 'google_transcribe';
|
||||
@@ -371,8 +391,6 @@ class TaskGather extends Task {
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.Error,
|
||||
this._onNuanceError.bind(this, cs, ep));
|
||||
|
||||
/* stall timers until prompt finishes playing */
|
||||
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||
@@ -388,14 +406,17 @@ class TaskGather extends Task {
|
||||
this._onDeepGramConnectFailure.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'ibm':
|
||||
this.bugname = 'ibm_transcribe';
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onIbmConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
|
||||
this._onIbmConnectFailure.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
|
||||
this._onIbmError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'nvidia':
|
||||
@@ -408,8 +429,6 @@ class TaskGather extends Task {
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
|
||||
this._onNvidiaError.bind(this, cs, ep));
|
||||
|
||||
/* I think nvidia has this (??) - stall timers until prompt finishes playing */
|
||||
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||
@@ -418,11 +437,23 @@ class TaskGather extends Task {
|
||||
break;
|
||||
|
||||
default:
|
||||
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
|
||||
this.notifyTaskDone();
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
if (this.vendor.startsWith('custom:')) {
|
||||
this.bugname = `${this.vendor}_transcribe`;
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onJambonzConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure,
|
||||
this._onJambonzConnectFailure.bind(this, cs, ep));
|
||||
break;
|
||||
}
|
||||
else {
|
||||
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
|
||||
this.notifyTaskDone();
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
}
|
||||
}
|
||||
|
||||
/* common handler for all stt engine errors */
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
|
||||
await ep.set(opts)
|
||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||
}
|
||||
@@ -522,7 +553,7 @@ class TaskGather extends Task {
|
||||
// make sure this is not a transcript from answering machine detection
|
||||
const bugname = fsEvent.getHeader('media-bugname');
|
||||
const finished = fsEvent.getHeader('transcription-session-finished');
|
||||
this.logger.debug({evt, bugname, finished}, 'Gather:_onTranscription');
|
||||
this.logger.debug({evt, bugname, finished}, `Gather:_onTranscription for vendor ${this.vendor}`);
|
||||
if (bugname && this.bugname !== bugname) return;
|
||||
|
||||
if (this.vendor === 'ibm') {
|
||||
@@ -530,12 +561,26 @@ class TaskGather extends Task {
|
||||
}
|
||||
|
||||
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language);
|
||||
if (evt.alternatives.length === 0) {
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
|
||||
return;
|
||||
}
|
||||
|
||||
/* fast path: our first partial transcript exactly matches an early hint */
|
||||
if (this.earlyHintsMatch && evt.is_final === false && this.partialTranscriptsCount++ === 0) {
|
||||
const transcript = evt.alternatives[0].transcript?.toLowerCase();
|
||||
const hints = this.data.recognizer?.hints || [];
|
||||
if (hints.find((h) => h.toLowerCase() === transcript)) {
|
||||
this.logger.debug({evt}, 'Gather:_onTranscription: early hint match');
|
||||
this._resolve('speech', evt);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* count words for bargein feature */
|
||||
const words = evt.alternatives[0]?.transcript.split(' ').length;
|
||||
const bufferedWords = this._bufferedTranscripts.reduce((count, e) => {
|
||||
return count + e.alternatives[0]?.transcript.split(' ').length;
|
||||
}, 0);
|
||||
const bufferedWords = this._sonioxTranscripts.length +
|
||||
this._bufferedTranscripts.reduce((count, e) => count + e.alternatives[0]?.transcript.split(' ').length, 0);
|
||||
|
||||
if (evt.is_final) {
|
||||
if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
|
||||
@@ -544,7 +589,6 @@ class TaskGather extends Task {
|
||||
}
|
||||
else {
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
|
||||
//this._startTranscribing(ep);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -568,7 +612,9 @@ class TaskGather extends Task {
|
||||
return this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
|
||||
}
|
||||
this._startAsrTimer();
|
||||
return this._startTranscribing(ep);
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
|
||||
}
|
||||
else {
|
||||
if (this.bargein && (words + bufferedWords) < this.minBargeinWordCount) {
|
||||
@@ -579,6 +625,12 @@ class TaskGather extends Task {
|
||||
return;
|
||||
}
|
||||
else {
|
||||
if (this.vendor === 'soniox') {
|
||||
/* compile transcripts into one */
|
||||
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
||||
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
|
||||
this._sonioxTranscripts = [];
|
||||
}
|
||||
this._resolve('speech', evt);
|
||||
}
|
||||
}
|
||||
@@ -589,6 +641,8 @@ class TaskGather extends Task {
|
||||
others do not.
|
||||
*/
|
||||
//const isStableEnough = typeof evt.stability === 'undefined' || evt.stability > GATHER_STABILITY_THRESHOLD;
|
||||
this._clearTimer();
|
||||
this._startTimer();
|
||||
if (this.bargein && (words + bufferedWords) >= this.minBargeinWordCount) {
|
||||
if (!this.playComplete) {
|
||||
this.logger.debug({transcript: evt.alternatives[0].transcript}, 'killing audio due to speech');
|
||||
@@ -602,6 +656,13 @@ class TaskGather extends Task {
|
||||
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt},
|
||||
this.cs.callInfo, httpHeaders));
|
||||
}
|
||||
if (this.vendor === 'soniox') {
|
||||
this._clearTimer();
|
||||
if (evt.vendor.finalWords.length) {
|
||||
this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');
|
||||
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_onEndOfUtterance(cs, ep) {
|
||||
@@ -610,34 +671,53 @@ class TaskGather extends Task {
|
||||
this._killAudio(cs);
|
||||
}
|
||||
|
||||
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length) {
|
||||
/**
|
||||
* By default, Gather asks google for multiple utterances.
|
||||
* The reason is that we can sometimes get an 'end_of_utterance' event without
|
||||
* getting a transcription. This can happen if someone coughs or mumbles.
|
||||
* For that reason don't ask for a single utterance and we'll terminate the transcribe operation
|
||||
* once we get a final transcript.
|
||||
* However, if the usr has specified a singleUtterance, then we need to restart here
|
||||
* since we dont have a final transcript yet.
|
||||
*/
|
||||
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsSingleUtterance) {
|
||||
this._startTranscribing(ep);
|
||||
}
|
||||
}
|
||||
|
||||
_onStartOfSpeech(cs, ep) {
|
||||
this.logger.debug('TaskGather:_onStartOfSpeech');
|
||||
if (this.bargein) {
|
||||
this._killAudio(cs);
|
||||
}
|
||||
}
|
||||
_onTranscriptionComplete(cs, ep) {
|
||||
this.logger.debug('TaskGather:_onTranscriptionComplete');
|
||||
}
|
||||
_onNuanceError(cs, ep, evt) {
|
||||
const {code, error, details} = evt;
|
||||
if (code === 404 && error === 'No speech') {
|
||||
this.logger.debug({code, error, details}, 'TaskGather:_onNuanceError');
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({code, error, details}, 'TaskGather:_onNuanceError');
|
||||
if (code === 413 && error === 'Too much speech') {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onNvidiaError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
|
||||
}
|
||||
_onDeepgramConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskGather:_onDeepgramConnect');
|
||||
}
|
||||
_onJambonzConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskGather:_onJambonzConnect');
|
||||
}
|
||||
_onJambonzError(cs, _ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onJambonzError');
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
|
||||
if (this.vendor === 'nuance') {
|
||||
const {code, error} = evt;
|
||||
if (code === 404 && error === 'No speech') return this._resolve('timeout');
|
||||
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({evt}, 'TaskGather:_onJambonzError');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
|
||||
}
|
||||
|
||||
_onDeepGramConnectFailure(cs, _ep, evt) {
|
||||
const {reason} = evt;
|
||||
@@ -652,6 +732,19 @@ class TaskGather extends Task {
|
||||
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor deepgram: ${reason}`});
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
_onJambonzConnectFailure(cs, _ep, evt) {
|
||||
const {reason} = evt;
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info({evt}, 'TaskGather:_onJambonzConnectFailure');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
_onIbmConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskGather:_onIbmConnect');
|
||||
@@ -701,6 +794,10 @@ class TaskGather extends Task {
|
||||
if (this.resolved) return;
|
||||
|
||||
this.resolved = true;
|
||||
// Clear dtmf event
|
||||
if (this.dtmfBargein) {
|
||||
this.ep.removeAllListeners('dtmf');
|
||||
}
|
||||
clearTimeout(this.interDigitTimer);
|
||||
this._clearTimer();
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ const Task = require('./task');
|
||||
const {TaskName, TaskPreconditions, ListenEvents, ListenStatus} = require('../utils/constants');
|
||||
const makeTask = require('./make_task');
|
||||
const moment = require('moment');
|
||||
const MAX_PLAY_AUDIO_QUEUE_SIZE = 10;
|
||||
|
||||
class TaskListen extends Task {
|
||||
constructor(logger, opts, parentTask) {
|
||||
@@ -20,6 +21,8 @@ class TaskListen extends Task {
|
||||
this.nested = parentTask instanceof Task;
|
||||
|
||||
this.results = {};
|
||||
this.playAudioQueue = [];
|
||||
this.isPlayingAudioFromQueue = false;
|
||||
|
||||
if (this.transcribe) this.transcribeTask = makeTask(logger, {'transcribe': opts.transcribe}, this);
|
||||
}
|
||||
@@ -58,6 +61,7 @@ class TaskListen extends Task {
|
||||
super.kill(cs);
|
||||
this.logger.debug(`TaskListen:kill endpoint connected? ${this.ep && this.ep.connected}`);
|
||||
this._clearTimer();
|
||||
this.playAudioQueue = [];
|
||||
if (this.ep && this.ep.connected) {
|
||||
this.logger.debug('TaskListen:kill closing websocket');
|
||||
try {
|
||||
@@ -184,16 +188,36 @@ class TaskListen extends Task {
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
async _onPlayAudio(ep, evt) {
|
||||
this.logger.info(`received play_audio event: ${JSON.stringify(evt)}`);
|
||||
async _playAudio(ep, evt, logger) {
|
||||
try {
|
||||
const results = await ep.play(evt.file);
|
||||
this.logger.debug(`Finished playing file, result: ${JSON.stringify(results)}`);
|
||||
logger.debug(`Finished playing file, result: ${JSON.stringify(results)}`);
|
||||
ep.forkAudioSendText({type: 'playDone', data: Object.assign({id: evt.id}, results)});
|
||||
} catch (err) {
|
||||
logger.error({err}, 'Error playing file');
|
||||
}
|
||||
catch (err) {
|
||||
this.logger.error({err}, 'Error playing file');
|
||||
}
|
||||
|
||||
async _onPlayAudio(ep, evt) {
|
||||
this.logger.info(`received play_audio event: ${JSON.stringify(evt)}`);
|
||||
if (!evt.queuePlay) {
|
||||
this.playAudioQueue = [];
|
||||
this._playAudio(ep, evt, this.logger);
|
||||
this.isPlayingAudioFromQueue = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.playAudioQueue.length <= MAX_PLAY_AUDIO_QUEUE_SIZE) {
|
||||
this.playAudioQueue.push(evt);
|
||||
}
|
||||
|
||||
if (this.isPlayingAudioFromQueue) return;
|
||||
|
||||
this.isPlayingAudioFromQueue = true;
|
||||
while (this.playAudioQueue.length > 0) {
|
||||
await this._playAudio(ep, this.playAudioQueue.shift(), this.logger);
|
||||
}
|
||||
this.isPlayingAudioFromQueue = false;
|
||||
}
|
||||
|
||||
_onKillAudio(ep) {
|
||||
|
||||
@@ -37,6 +37,7 @@ class TaskPlay extends Task {
|
||||
}, this.timeoutSecs * 1000);
|
||||
}
|
||||
try {
|
||||
this.notifyStatus({event: 'start-playback'});
|
||||
while (!this.killed && (this.loop === 'forever' || this.loop--) && this.ep.connected) {
|
||||
if (cs.isInConference) {
|
||||
const {memberId, confName, confUuid} = cs;
|
||||
@@ -80,7 +81,8 @@ class TaskPlay extends Task {
|
||||
this.killPlayToConfMember(this.ep, memberId, confName);
|
||||
}
|
||||
else {
|
||||
await this.ep.api('uuid_break', this.ep.uuid).catch((err) => this.logger.info(err, 'Error killing audio'));
|
||||
this.notifyStatus({event: 'kill-playback'});
|
||||
this.ep.api('uuid_break', this.ep.uuid).catch((err) => this.logger.info(err, 'Error killing audio'));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ class TaskRestDial extends Task {
|
||||
*/
|
||||
async exec(cs) {
|
||||
await super.exec(cs);
|
||||
this.req = cs.req;
|
||||
this.canCancel = true;
|
||||
|
||||
this._setCallTimer();
|
||||
await this.awaitTaskDone();
|
||||
@@ -36,15 +36,15 @@ class TaskRestDial extends Task {
|
||||
kill(cs) {
|
||||
super.kill(cs);
|
||||
this._clearCallTimer();
|
||||
if (this.req) {
|
||||
this.req.cancel();
|
||||
this.req = null;
|
||||
if (this.canCancel && cs?.req) {
|
||||
this.canCancel = false;
|
||||
cs.req.cancel();
|
||||
}
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
async _onConnect(dlg) {
|
||||
this.req = null;
|
||||
this.canCancel = false;
|
||||
const cs = this.callSession;
|
||||
cs.setDialog(dlg);
|
||||
|
||||
@@ -79,7 +79,7 @@ class TaskRestDial extends Task {
|
||||
_onCallStatus(status) {
|
||||
this.logger.debug(`CallStatus: ${status}`);
|
||||
if (status >= 200) {
|
||||
this.req = null;
|
||||
this.canCancel = false;
|
||||
this._clearCallTimer();
|
||||
if (status !== 200) this.notifyTaskDone();
|
||||
}
|
||||
|
||||
@@ -143,7 +143,7 @@ class TaskSay extends Task {
|
||||
span.end();
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.TTS_NOT_PROVISIONED,
|
||||
alert_type: AlertType.TTS_FAILURE,
|
||||
vendor,
|
||||
detail: err.message
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
|
||||
@@ -154,7 +154,6 @@ class TaskSay extends Task {
|
||||
|
||||
const arr = this.text.map((t) => generateAudio(t));
|
||||
const filepath = (await Promise.all(arr)).filter((fp) => fp && fp.length);
|
||||
this.logger.debug({filepath}, 'synthesized files for tts');
|
||||
this.notifyStatus({event: 'start-playback'});
|
||||
|
||||
while (!this.killed && (this.loop === 'forever' || this.loop--) && this.ep?.connected) {
|
||||
|
||||
@@ -3,12 +3,14 @@ const {
|
||||
TaskName,
|
||||
TaskPreconditions,
|
||||
GoogleTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
NuanceTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
const { normalizeJambones } = require('@jambonz/verb-specifications');
|
||||
|
||||
@@ -22,11 +24,13 @@ class TaskTranscribe extends Task {
|
||||
setChannelVarsForStt,
|
||||
normalizeTranscription,
|
||||
removeSpeechListeners,
|
||||
setSpeechCredentialsAtRuntime
|
||||
setSpeechCredentialsAtRuntime,
|
||||
compileSonioxTranscripts
|
||||
} = require('../utils/transcription-utils')(logger);
|
||||
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||
this.normalizeTranscription = normalizeTranscription;
|
||||
this.removeSpeechListeners = removeSpeechListeners;
|
||||
this.compileSonioxTranscripts = compileSonioxTranscripts;
|
||||
|
||||
this.transcriptionHook = this.data.transcriptionHook;
|
||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||
@@ -40,6 +44,9 @@ class TaskTranscribe extends Task {
|
||||
/* let credentials be supplied in the recognizer object at runtime */
|
||||
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
||||
|
||||
/* buffer for soniox transcripts */
|
||||
this._sonioxTranscripts = [];
|
||||
|
||||
recognizer.hints = recognizer.hints || [];
|
||||
recognizer.altLanguages = recognizer.altLanguages || [];
|
||||
}
|
||||
@@ -183,8 +190,6 @@ class TaskTranscribe extends Task {
|
||||
this._onStartOfSpeech.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete,
|
||||
this._onTranscriptionComplete.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Error,
|
||||
this._onNuanceError.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'deepgram':
|
||||
this.bugname = 'deepgram_transcribe';
|
||||
@@ -195,7 +200,11 @@ class TaskTranscribe extends Task {
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
|
||||
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
|
||||
break;
|
||||
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'ibm':
|
||||
this.bugname = 'ibm_transcribe';
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
|
||||
@@ -204,8 +213,6 @@ class TaskTranscribe extends Task {
|
||||
this._onIbmConnect.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
|
||||
this._onIbmConnectFailure.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
|
||||
this._onIbmError.bind(this, cs, ep, channel));
|
||||
break;
|
||||
|
||||
case 'nvidia':
|
||||
@@ -218,14 +225,13 @@ class TaskTranscribe extends Task {
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
|
||||
this._onNvidiaError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
}
|
||||
|
||||
/* common handler for all stt engine errors */
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
|
||||
await ep.set(opts)
|
||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||
|
||||
@@ -253,8 +259,11 @@ class TaskTranscribe extends Task {
|
||||
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - before normalization');
|
||||
|
||||
evt = this.normalizeTranscription(evt, this.vendor, channel, this.language);
|
||||
|
||||
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
|
||||
if (evt.alternatives.length === 0) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
|
||||
return;
|
||||
}
|
||||
|
||||
if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
|
||||
if (['microsoft', 'deepgram'].includes(this.vendor)) {
|
||||
@@ -267,6 +276,15 @@ class TaskTranscribe extends Task {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.vendor === 'soniox') {
|
||||
/* compile transcripts into one */
|
||||
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
||||
if (evt.is_final) {
|
||||
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
|
||||
this._sonioxTranscripts = [];
|
||||
}
|
||||
}
|
||||
|
||||
if (this.transcriptionHook) {
|
||||
const b3 = this.getTracingPropagation();
|
||||
const httpHeaders = b3 && {b3};
|
||||
@@ -315,20 +333,6 @@ class TaskTranscribe extends Task {
|
||||
this._timer = null;
|
||||
}
|
||||
}
|
||||
_onNuanceError(_cs, _ep, _channel, evt) {
|
||||
const {code, error, details} = evt;
|
||||
if (code === 404 && error === 'No speech') {
|
||||
this.logger.debug({code, error, details}, 'TaskTranscribe:_onNuanceError');
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({code, error, details}, 'TaskTranscribe:_onNuanceError');
|
||||
if (code === 413 && error === 'Too much speech') {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onNvidiaError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
|
||||
}
|
||||
_onDeepgramConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
|
||||
}
|
||||
@@ -365,7 +369,25 @@ class TaskTranscribe extends Task {
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
_onIbmError(cs, _ep, _channel, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onIbmError');
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
|
||||
}
|
||||
_onJambonzError(cs, _ep, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
|
||||
if (this.vendor === 'nuance') {
|
||||
const {code, error} = evt;
|
||||
if (code === 404 && error === 'No speech') return this._resolve('timeout');
|
||||
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -267,7 +267,6 @@ module.exports = (logger) => {
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, onNoSpeechDetected.bind(null, cs, ep, task));
|
||||
}
|
||||
logger.debug({sttOpts}, 'startAmd: setting channel vars');
|
||||
await ep.set(sttOpts).catch((err) => logger.info(err, 'Error setting channel variables'));
|
||||
|
||||
amd
|
||||
|
||||
@@ -86,6 +86,10 @@
|
||||
"ConnectFailure": "deepgram_transcribe::connect_failed",
|
||||
"Connect": "deepgram_transcribe::connect"
|
||||
},
|
||||
"SonioxTranscriptionEvents": {
|
||||
"Transcription": "soniox_transcribe::transcription",
|
||||
"Error": "soniox_transcribe::error"
|
||||
},
|
||||
"IbmTranscriptionEvents": {
|
||||
"Transcription": "ibm_transcribe::transcription",
|
||||
"ConnectFailure": "ibm_transcribe::connect_failed",
|
||||
@@ -106,6 +110,12 @@
|
||||
"NoSpeechDetected": "azure_transcribe::no_speech_detected",
|
||||
"VadDetected": "azure_transcribe::vad_detected"
|
||||
},
|
||||
"JambonzTranscriptionEvents": {
|
||||
"Transcription": "jambonz_transcribe::transcription",
|
||||
"ConnectFailure": "jambonz_transcribe::connect_failed",
|
||||
"Connect": "jambonz_transcribe::connect",
|
||||
"Error": "jambonz_transcribe::error"
|
||||
},
|
||||
"ListenEvents": {
|
||||
"Connect": "mod_audio_fork::connect",
|
||||
"ConnectFailure": "mod_audio_fork::connect_failed",
|
||||
@@ -147,6 +157,7 @@
|
||||
"queue:status",
|
||||
"dial:confirm",
|
||||
"verb:hook",
|
||||
"verb:status",
|
||||
"jambonz:error"
|
||||
],
|
||||
"RecordState": {
|
||||
|
||||
@@ -50,6 +50,8 @@ const speechMapper = (cred) => {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.client_id = o.client_id;
|
||||
obj.secret = o.secret;
|
||||
obj.nuance_tts_uri = o.nuance_tts_uri;
|
||||
obj.nuance_stt_uri = o.nuance_stt_uri;
|
||||
}
|
||||
else if ('ibm' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
@@ -62,6 +64,16 @@ const speechMapper = (cred) => {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
}
|
||||
else if ('soniox' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
}
|
||||
else if (obj.vendor.startsWith('custom:')) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.auth_token = o.auth_token;
|
||||
obj.custom_stt_url = o.custom_stt_url;
|
||||
obj.custom_tts_url = o.custom_tts_url;
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
}
|
||||
@@ -79,47 +91,13 @@ module.exports = (logger, srf) => {
|
||||
const [r2] = await pp.query(sqlSpeechCredentials, account_sid);
|
||||
const speech = r2.map(speechMapper);
|
||||
|
||||
/* search at the service provider level if we don't find it at the account level */
|
||||
const haveGoogle = speech.find((s) => s.vendor === 'google');
|
||||
const haveAws = speech.find((s) => s.vendor === 'aws');
|
||||
const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft');
|
||||
const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid');
|
||||
const haveNuance = speech.find((s) => s.vendor === 'nuance');
|
||||
const haveDeepgram = speech.find((s) => s.vendor === 'deepgram');
|
||||
const haveIbm = speech.find((s) => s.vendor === 'ibm');
|
||||
if (!haveGoogle || !haveAws || !haveMicrosoft || !haveWellsaid || !haveNuance || !haveIbm || !haveDeepgram) {
|
||||
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
|
||||
if (r3.length) {
|
||||
if (!haveGoogle) {
|
||||
const google = r3.find((s) => s.vendor === 'google');
|
||||
if (google) speech.push(speechMapper(google));
|
||||
}
|
||||
if (!haveAws) {
|
||||
const aws = r3.find((s) => s.vendor === 'aws');
|
||||
if (aws) speech.push(speechMapper(aws));
|
||||
}
|
||||
if (!haveMicrosoft) {
|
||||
const ms = r3.find((s) => s.vendor === 'microsoft');
|
||||
if (ms) speech.push(speechMapper(ms));
|
||||
}
|
||||
if (!haveWellsaid) {
|
||||
const wellsaid = r3.find((s) => s.vendor === 'wellsaid');
|
||||
if (wellsaid) speech.push(speechMapper(wellsaid));
|
||||
}
|
||||
if (!haveNuance) {
|
||||
const nuance = r3.find((s) => s.vendor === 'nuance');
|
||||
if (nuance) speech.push(speechMapper(nuance));
|
||||
}
|
||||
if (!haveDeepgram) {
|
||||
const deepgram = r3.find((s) => s.vendor === 'deepgram');
|
||||
if (deepgram) speech.push(speechMapper(deepgram));
|
||||
}
|
||||
if (!haveIbm) {
|
||||
const ibm = r3.find((s) => s.vendor === 'ibm');
|
||||
if (ibm) speech.push(speechMapper(ibm));
|
||||
}
|
||||
/* add service provider creds unless we have that vendor at the account level */
|
||||
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
|
||||
r3.forEach((s) => {
|
||||
if (!speech.find((s2) => s2.vendor === s.vendor)) {
|
||||
speech.push(speechMapper(s));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
...r[0],
|
||||
|
||||
@@ -2,9 +2,9 @@ const crypto = require('crypto');
|
||||
const algorithm = process.env.LEGACY_CRYPTO ? 'aes-256-ctr' : 'aes-256-cbc';
|
||||
const iv = crypto.randomBytes(16);
|
||||
const secretKey = crypto.createHash('sha256')
|
||||
.update(String(process.env.JWT_SECRET))
|
||||
.update(process.env.ENCRYPTION_SECRET || process.env.JWT_SECRET)
|
||||
.digest('base64')
|
||||
.substr(0, 32);
|
||||
.substring(0, 32);
|
||||
|
||||
const encrypt = (text) => {
|
||||
const cipher = crypto.createCipheriv(algorithm, secretKey, iv);
|
||||
@@ -25,8 +25,8 @@ const decrypt = (data) => {
|
||||
throw err;
|
||||
}
|
||||
const decipher = crypto.createDecipheriv(algorithm, secretKey, Buffer.from(hash.iv, 'hex'));
|
||||
const decrpyted = Buffer.concat([decipher.update(Buffer.from(hash.content, 'hex')), decipher.final()]);
|
||||
return decrpyted.toString();
|
||||
const decrypted = Buffer.concat([decipher.update(Buffer.from(hash.content, 'hex')), decipher.final()]);
|
||||
return decrypted.toString();
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -138,7 +138,6 @@ function installSrfLocals(srf, logger) {
|
||||
retrieveCall,
|
||||
listCalls,
|
||||
deleteCall,
|
||||
synthAudio,
|
||||
createHash,
|
||||
retrieveHash,
|
||||
deleteKey,
|
||||
@@ -151,11 +150,17 @@ function installSrfLocals(srf, logger) {
|
||||
pushBack,
|
||||
popFront,
|
||||
removeFromList,
|
||||
lengthOfList,
|
||||
getListPosition,
|
||||
lengthOfList,
|
||||
} = require('@jambonz/realtimedb-helpers')({
|
||||
host: process.env.JAMBONES_REDIS_HOST,
|
||||
port: process.env.JAMBONES_REDIS_PORT || 6379
|
||||
}, logger, tracer);
|
||||
const {
|
||||
synthAudio,
|
||||
getNuanceAccessToken,
|
||||
getIbmAccessToken,
|
||||
} = require('@jambonz/realtimedb-helpers')({
|
||||
} = require('@jambonz/speech-utils')({
|
||||
host: process.env.JAMBONES_REDIS_HOST,
|
||||
port: process.env.JAMBONES_REDIS_PORT || 6379
|
||||
}, logger, tracer);
|
||||
|
||||
@@ -242,7 +242,8 @@ const createSipRecPayload = (sdp1, sdp2, logger) => {
|
||||
.replace(/a=sendonly\r\n/g, '')
|
||||
.replace(/a=direction:both\r\n/g, '');
|
||||
*/
|
||||
return combinedSdp;
|
||||
|
||||
return combinedSdp.replace(/sendrecv/g, 'recvonly');
|
||||
};
|
||||
|
||||
module.exports = { parseSiprecPayload, createSipRecPayload } ;
|
||||
|
||||
@@ -5,7 +5,9 @@ const {
|
||||
AwsTranscriptionEvents,
|
||||
NuanceTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
SonioxTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents
|
||||
} = require('./constants');
|
||||
|
||||
const stickyVars = {
|
||||
@@ -27,6 +29,7 @@ const stickyVars = {
|
||||
'AZURE_SERVICE_ENDPOINT_ID',
|
||||
'AZURE_REQUEST_SNR',
|
||||
'AZURE_PROFANITY_OPTION',
|
||||
'AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES',
|
||||
'AZURE_SERVICE_ENDPOINT',
|
||||
'AZURE_INITIAL_SPEECH_TIMEOUT_MS',
|
||||
'AZURE_USE_OUTPUT_FORMAT_DETAILED',
|
||||
@@ -88,9 +91,70 @@ const stickyVars = {
|
||||
],
|
||||
nvidia: [
|
||||
'NVIDIA_HINTS'
|
||||
],
|
||||
soniox: [
|
||||
'SONIOX_PROFANITY_FILTER',
|
||||
'SONIOX_MODEL'
|
||||
]
|
||||
};
|
||||
|
||||
const compileSonioxTranscripts = (finalWordChunks, channel, language) => {
|
||||
const words = finalWordChunks.flat();
|
||||
const transcript = words.reduce((acc, word) => {
|
||||
if (word.text === '<end>') return acc;
|
||||
if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`;
|
||||
return `${acc} ${word.text}`;
|
||||
}, '').trim();
|
||||
const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== '<end>');
|
||||
const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length;
|
||||
const alternatives = [{transcript, confidence}];
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: true,
|
||||
alternatives,
|
||||
vendor: {
|
||||
name: 'soniox',
|
||||
evt: words
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeSoniox = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
|
||||
/* an <end> token indicates the end of an utterance */
|
||||
const endTokenPos = evt.words.map((w) => w.text).indexOf('<end>');
|
||||
const endpointReached = endTokenPos !== -1;
|
||||
const words = endpointReached ? evt.words.slice(0, endTokenPos) : evt.words;
|
||||
|
||||
/* note: we can safely ignore words after the <end> token as they will be returned again */
|
||||
const finalWords = words.filter((word) => word.is_final);
|
||||
const nonFinalWords = words.filter((word) => !word.is_final);
|
||||
|
||||
const is_final = endpointReached && finalWords.length > 0;
|
||||
const transcript = words.reduce((acc, word) => {
|
||||
if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`;
|
||||
else return `${acc} ${word.text}`;
|
||||
}, '').trim();
|
||||
const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== '<end>');
|
||||
const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length;
|
||||
const alternatives = [{transcript, confidence}];
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final,
|
||||
alternatives,
|
||||
vendor: {
|
||||
name: 'soniox',
|
||||
endpointReached,
|
||||
evt: copy,
|
||||
finalWords,
|
||||
nonFinalWords
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeDeepgram = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const alternatives = (evt.channel?.alternatives || [])
|
||||
@@ -161,6 +225,15 @@ const normalizeGoogle = (evt, channel, language) => {
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeCustom = (evt, channel, language) => {
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.is_final,
|
||||
alternatives: [evt.alternatives[0]]
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeNuance = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
@@ -221,7 +294,7 @@ const normalizeAws = (evt, channel, language) => {
|
||||
module.exports = (logger) => {
|
||||
const normalizeTranscription = (evt, vendor, channel, language) => {
|
||||
|
||||
logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
|
||||
//logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
|
||||
switch (vendor) {
|
||||
case 'deepgram':
|
||||
return normalizeDeepgram(evt, channel, language);
|
||||
@@ -237,7 +310,12 @@ module.exports = (logger) => {
|
||||
return normalizeIbm(evt, channel, language);
|
||||
case 'nvidia':
|
||||
return normalizeNvidia(evt, channel, language);
|
||||
case 'soniox':
|
||||
return normalizeSoniox(evt, channel, language);
|
||||
default:
|
||||
if (vendor.startsWith('custom:')) {
|
||||
return normalizeCustom(evt, channel, language);
|
||||
}
|
||||
logger.error(`Unknown vendor ${vendor}`);
|
||||
return evt;
|
||||
}
|
||||
@@ -247,6 +325,7 @@ module.exports = (logger) => {
|
||||
let opts = {};
|
||||
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
|
||||
const vad = {enable, voiceMs, mode};
|
||||
const vendor = rOpts.vendor;
|
||||
|
||||
/* voice activity detection works across vendors */
|
||||
opts = {
|
||||
@@ -256,59 +335,43 @@ module.exports = (logger) => {
|
||||
...(vad.enable && typeof vad.mode === 'number' && {RECOGNIZER_VAD_MODE: vad.mode}),
|
||||
};
|
||||
|
||||
if ('google' === rOpts.vendor) {
|
||||
if ('google' === vendor) {
|
||||
let model = 'phone_call';
|
||||
if (rOpts.altLanguages.length > 0) model = task.name === TaskName.Gather ? 'command_and_search' : 'latest_long';
|
||||
opts = {
|
||||
...opts,
|
||||
...(sttCredentials &&
|
||||
{GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
|
||||
...(rOpts.enhancedModel &&
|
||||
{GOOGLE_SPEECH_USE_ENHANCED: 1}),
|
||||
...(rOpts.separateRecognitionPerChannel &&
|
||||
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
|
||||
...(rOpts.profanityFilter &&
|
||||
{GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
|
||||
...(rOpts.punctuation &&
|
||||
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
|
||||
...(rOpts.words &&
|
||||
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
|
||||
...((rOpts.singleUtterance || task.name === TaskName.Gather) &&
|
||||
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
|
||||
...(rOpts.diarization &&
|
||||
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
|
||||
...(sttCredentials && {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
|
||||
...(rOpts.separateRecognitionPerChannel && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
|
||||
...(rOpts.separateRecognitionPerChanne === false && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
|
||||
...(rOpts.profanityFilter && {GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
|
||||
...(rOpts.punctuation && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
|
||||
...(rOpts.words && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
|
||||
...(rOpts.singleUtterance && {GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
|
||||
...(rOpts.diarization && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
|
||||
...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 &&
|
||||
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}),
|
||||
...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 &&
|
||||
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}),
|
||||
...(rOpts.enhancedModel === false &&
|
||||
{GOOGLE_SPEECH_USE_ENHANCED: 0}),
|
||||
...(rOpts.separateRecognitionPerChannel === false &&
|
||||
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
|
||||
...(rOpts.profanityFilter === false &&
|
||||
{GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
|
||||
...(rOpts.punctuation === false &&
|
||||
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
|
||||
...(rOpts.words == false &&
|
||||
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
|
||||
...((rOpts.singleUtterance === false || task.name === TaskName.Transcribe) &&
|
||||
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 0}),
|
||||
...(rOpts.diarization === false &&
|
||||
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
|
||||
...(rOpts.enhancedModel && {GOOGLE_SPEECH_USE_ENHANCED: 1}),
|
||||
...(rOpts.profanityFilter === false && {GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
|
||||
...(rOpts.punctuation === false && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
|
||||
...(rOpts.words == false && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
|
||||
...(rOpts.diarization === false && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
|
||||
{GOOGLE_SPEECH_HINTS: rOpts.hints.join(',')}),
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
|
||||
{GOOGLE_SPEECH_HINTS: JSON.stringify(rOpts.hints)}),
|
||||
...(typeof rOpts.hintsBoost === 'number' &&
|
||||
{GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
|
||||
...(typeof rOpts.hintsBoost === 'number' && {GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
|
||||
...(rOpts.altLanguages.length > 0 &&
|
||||
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: rOpts.altLanguages.join(',')}),
|
||||
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
|
||||
...(rOpts.interactionType &&
|
||||
{GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}),
|
||||
...{GOOGLE_SPEECH_MODEL: rOpts.model || (task.name === TaskName.Gather ? 'latest_short' : 'phone_call')},
|
||||
...(rOpts.naicsCode > 0 &&
|
||||
{GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
|
||||
...{GOOGLE_SPEECH_MODEL: rOpts.model || model},
|
||||
...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
|
||||
GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
|
||||
};
|
||||
}
|
||||
else if (['aws', 'polly'].includes(rOpts.vendor)) {
|
||||
else if (['aws', 'polly'].includes(vendor)) {
|
||||
opts = {
|
||||
...opts,
|
||||
...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
|
||||
@@ -321,7 +384,7 @@ module.exports = (logger) => {
|
||||
}),
|
||||
};
|
||||
}
|
||||
else if ('microsoft' === rOpts.vendor) {
|
||||
else if ('microsoft' === vendor) {
|
||||
opts = {
|
||||
...opts,
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
|
||||
@@ -329,7 +392,7 @@ module.exports = (logger) => {
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
|
||||
{AZURE_SPEECH_HINTS: rOpts.hints.map((h) => h.phrase).join(',')}),
|
||||
...(rOpts.altLanguages && rOpts.altLanguages.length > 0 &&
|
||||
{AZURE_SERVICE_ENDPOINT_ID: rOpts.sttCredentials}),
|
||||
{AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
|
||||
...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}),
|
||||
...(rOpts.profanityOption && {AZURE_PROFANITY_OPTION: rOpts.profanityOption}),
|
||||
...(rOpts.azureServiceEndpoint && {AZURE_SERVICE_ENDPOINT: rOpts.azureServiceEndpoint}),
|
||||
@@ -346,7 +409,7 @@ module.exports = (logger) => {
|
||||
{AZURE_SERVICE_ENDPOINT_ID: sttCredentials.custom_stt_endpoint})
|
||||
};
|
||||
}
|
||||
else if ('nuance' === rOpts.vendor) {
|
||||
else if ('nuance' === vendor) {
|
||||
/**
|
||||
* Note: all nuance options are in recognizer.nuanceOptions, should migrate
|
||||
* other vendor settings to similar nested structure
|
||||
@@ -354,12 +417,9 @@ module.exports = (logger) => {
|
||||
const {nuanceOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
...(sttCredentials.access_token) &&
|
||||
{NUANCE_ACCESS_TOKEN: sttCredentials.access_token},
|
||||
...(sttCredentials.krypton_endpoint) &&
|
||||
{NUANCE_KRYPTON_ENDPOINT: sttCredentials.krypton_endpoint},
|
||||
...(nuanceOptions.topic) &&
|
||||
{NUANCE_TOPIC: nuanceOptions.topic},
|
||||
...(sttCredentials.access_token) && {NUANCE_ACCESS_TOKEN: sttCredentials.access_token},
|
||||
...(sttCredentials.nuance_stt_uri) && {NUANCE_KRYPTON_ENDPOINT: sttCredentials.nuance_stt_uri},
|
||||
...(nuanceOptions.topic) && {NUANCE_TOPIC: nuanceOptions.topic},
|
||||
...(nuanceOptions.utteranceDetectionMode) &&
|
||||
{NUANCE_UTTERANCE_DETECTION_MODE: nuanceOptions.utteranceDetectionMode},
|
||||
...(nuanceOptions.punctuation || rOpts.punctuation) && {NUANCE_PUNCTUATION: nuanceOptions.punctuation},
|
||||
@@ -397,7 +457,7 @@ module.exports = (logger) => {
|
||||
{NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)},
|
||||
};
|
||||
}
|
||||
else if ('deepgram' === rOpts.vendor) {
|
||||
else if ('deepgram' === vendor) {
|
||||
const {deepgramOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
@@ -441,7 +501,30 @@ module.exports = (logger) => {
|
||||
{DEEPGRAM_SPEECH_TAG: deepgramOptions.tag}
|
||||
};
|
||||
}
|
||||
else if ('ibm' === rOpts.vendor) {
|
||||
else if ('soniox' === vendor) {
|
||||
const {sonioxOptions = {}} = rOpts;
|
||||
const {storage = {}} = sonioxOptions;
|
||||
opts = {
|
||||
...opts,
|
||||
...(sttCredentials.api_key) &&
|
||||
{SONIOX_API_KEY: sttCredentials.api_key},
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
|
||||
{SONIOX_HINTS: rOpts.hints.join(',')}),
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
|
||||
{SONIOX_HINTS: JSON.stringify(rOpts.hints)}),
|
||||
...(typeof rOpts.hintsBoost === 'number' &&
|
||||
{SONIOX_HINTS_BOOST: rOpts.hintsBoost}),
|
||||
...(sonioxOptions.model) &&
|
||||
{SONIOX_MODEL: sonioxOptions.model},
|
||||
...((sonioxOptions.profanityFilter || rOpts.profanityFilter) && {SONIOX_PROFANITY_FILTER: 1}),
|
||||
...(storage?.id && {SONIOX_STORAGE_ID: storage.id}),
|
||||
...(storage?.id && storage?.title && {SONIOX_STORAGE_TITLE: storage.title}),
|
||||
...(storage?.id && storage?.disableStoreAudio && {SONIOX_STORAGE_DISABLE_AUDIO: 1}),
|
||||
...(storage?.id && storage?.disableStoreTranscript && {SONIOX_STORAGE_DISABLE_TRANSCRIPT: 1}),
|
||||
...(storage?.id && storage?.disableSearch && {SONIOX_STORAGE_DISABLE_SEARCH: 1})
|
||||
};
|
||||
}
|
||||
else if ('ibm' === vendor) {
|
||||
const {ibmOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
@@ -465,7 +548,7 @@ module.exports = (logger) => {
|
||||
{IBM_SPEECH_WATSON_LEARNING_OPT_OUT: ibmOptions.watsonLearningOptOut}
|
||||
};
|
||||
}
|
||||
else if ('nvidia' === rOpts.vendor) {
|
||||
else if ('nvidia' === vendor) {
|
||||
const {nvidiaOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
@@ -494,11 +577,29 @@ module.exports = (logger) => {
|
||||
{NVIDIA_CUSTOM_CONFIGURATION: JSON.stringify(nvidiaOptions.customConfiguration)}),
|
||||
};
|
||||
}
|
||||
else if (vendor.startsWith('custom:')) {
|
||||
let {options = {}} = rOpts;
|
||||
const {auth_token, custom_stt_url} = sttCredentials;
|
||||
options = {
|
||||
...options,
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
|
||||
{hints: rOpts.hints}),
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
|
||||
{hints: JSON.stringify(rOpts.hints)}),
|
||||
...(typeof rOpts.hintsBoost === 'number' && {hintsBoost: rOpts.hintsBoost})
|
||||
};
|
||||
|
||||
stickyVars[rOpts.vendor].forEach((key) => {
|
||||
opts = {
|
||||
...opts,
|
||||
JAMBONZ_STT_API_KEY: auth_token,
|
||||
JAMBONZ_STT_URL: custom_stt_url,
|
||||
...(Object.keys(options).length > 0 && {JAMBONZ_STT_OPTIONS: JSON.stringify(options)}),
|
||||
};
|
||||
}
|
||||
|
||||
(stickyVars[vendor] || []).forEach((key) => {
|
||||
if (!opts[key]) opts[key] = '';
|
||||
});
|
||||
logger.debug({opts}, 'recognizer channel vars');
|
||||
return opts;
|
||||
};
|
||||
|
||||
@@ -517,25 +618,32 @@ module.exports = (logger) => {
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Connect);
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure);
|
||||
|
||||
ep.removeCustomEventListener(SonioxTranscriptionEvents.Transcription);
|
||||
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.TranscriptionComplete);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.StartOfSpeech);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.Error);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.Connect);
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.ConnectFailure);
|
||||
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.Error);
|
||||
};
|
||||
|
||||
const setSpeechCredentialsAtRuntime = (recognizer) => {
|
||||
if (!recognizer) return;
|
||||
if (recognizer.vendor === 'nuance') {
|
||||
const {clientId, secret} = recognizer.nuanceOptions || {};
|
||||
const {clientId, secret, kryptonEndpoint} = recognizer.nuanceOptions || {};
|
||||
if (clientId && secret) return {client_id: clientId, secret};
|
||||
if (kryptonEndpoint) return {nuance_stt_uri: kryptonEndpoint};
|
||||
}
|
||||
else if (recognizer.vendor === 'nvidia') {
|
||||
const {rivaUri} = recognizer.nvidiaOptions || {};
|
||||
@@ -545,6 +653,10 @@ module.exports = (logger) => {
|
||||
const {apiKey} = recognizer.deepgramOptions || {};
|
||||
if (apiKey) return {api_key: apiKey};
|
||||
}
|
||||
else if (recognizer.vendor === 'soniox') {
|
||||
const {apiKey} = recognizer.sonioxOptions || {};
|
||||
if (apiKey) return {api_key: apiKey};
|
||||
}
|
||||
else if (recognizer.vendor === 'ibm') {
|
||||
const {ttsApiKey, ttsRegion, sttApiKey, sttRegion, instanceId} = recognizer.ibmOptions || {};
|
||||
if (ttsApiKey || sttApiKey) return {
|
||||
@@ -561,6 +673,7 @@ module.exports = (logger) => {
|
||||
normalizeTranscription,
|
||||
setChannelVarsForStt,
|
||||
removeSpeechListeners,
|
||||
setSpeechCredentialsAtRuntime
|
||||
setSpeechCredentialsAtRuntime,
|
||||
compileSonioxTranscripts
|
||||
};
|
||||
};
|
||||
|
||||
@@ -219,7 +219,6 @@ class WsRequestor extends BaseRequestor {
|
||||
}
|
||||
|
||||
_setHandlers(ws) {
|
||||
this.logger.debug('WsRequestor:_setHandlers');
|
||||
ws
|
||||
.once('open', this._onOpen.bind(this, ws))
|
||||
.once('close', this._onClose.bind(this))
|
||||
@@ -274,6 +273,7 @@ class WsRequestor extends BaseRequestor {
|
||||
}, 'WsRequestor - unexpected response');
|
||||
this.emit('connection-failure');
|
||||
this.emit('not-ready', new Error(`${res.statusCode} ${res.statusMessage}`));
|
||||
this.connections++;
|
||||
}
|
||||
|
||||
_onSocketClosed() {
|
||||
@@ -338,7 +338,7 @@ class WsRequestor extends BaseRequestor {
|
||||
this.logger.info({url: this.url}, `WsRequestor:_recvAck - ack to unknown msgid ${msgid}, discarding`);
|
||||
return;
|
||||
}
|
||||
this.logger.debug({url: this.url}, `WsRequestor:_recvAck - received response to ${msgid}`);
|
||||
//this.logger.debug({url: this.url}, `WsRequestor:_recvAck - received response to ${msgid}`);
|
||||
this.messagesInFlight.delete(msgid);
|
||||
const {success} = obj;
|
||||
success && success(data);
|
||||
|
||||
2409
package-lock.json
generated
2409
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
13
package.json
13
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "jambonz-feature-server",
|
||||
"version": "v0.8.0",
|
||||
"version": "v0.8.2",
|
||||
"main": "app.js",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
@@ -19,17 +19,18 @@
|
||||
"bugs": {},
|
||||
"scripts": {
|
||||
"start": "node app",
|
||||
"test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:JambonzR0ck$:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
|
||||
"test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 ENCRYPTION_SECRET=foobar DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:JambonzR0ck$:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
|
||||
"coverage": "./node_modules/.bin/nyc --reporter html --report-dir ./coverage npm run test",
|
||||
"jslint": "eslint app.js lib"
|
||||
},
|
||||
"dependencies": {
|
||||
"@jambonz/db-helpers": "^0.7.4",
|
||||
"@jambonz/http-health-check": "^0.0.1",
|
||||
"@jambonz/realtimedb-helpers": "^0.6.5",
|
||||
"@jambonz/realtimedb-helpers": "^0.7.0",
|
||||
"@jambonz/speech-utils": "^0.0.12",
|
||||
"@jambonz/stats-collector": "^0.1.6",
|
||||
"@jambonz/time-series": "^0.2.5",
|
||||
"@jambonz/verb-specifications": "^0.0.3",
|
||||
"@jambonz/verb-specifications": "^0.0.11",
|
||||
"@opentelemetry/api": "^1.4.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.9.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
|
||||
@@ -43,7 +44,7 @@
|
||||
"bent": "^7.3.12",
|
||||
"debug": "^4.3.4",
|
||||
"deepcopy": "^2.1.0",
|
||||
"drachtio-fsmrf": "^3.0.18",
|
||||
"drachtio-fsmrf": "^3.0.20",
|
||||
"drachtio-srf": "^4.5.23",
|
||||
"express": "^4.18.2",
|
||||
"ip": "^1.1.8",
|
||||
@@ -56,7 +57,7 @@
|
||||
"short-uuid": "^4.2.2",
|
||||
"sinon": "^15.0.1",
|
||||
"to-snake-case": "^1.0.0",
|
||||
"undici": "^5.16.0",
|
||||
"undici": "^5.19.1",
|
||||
"uuid-random": "^1.3.2",
|
||||
"verify-aws-sns-signature": "^0.1.0",
|
||||
"ws": "^8.9.0",
|
||||
|
||||
@@ -206,7 +206,49 @@ test('\'gather\' test - deepgram', async(t) => {
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
//console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'gather: succeeds when using deepgram credentials');
|
||||
'gather: succeeds when using deepgram credentials');
|
||||
disconnect();
|
||||
} catch (err) {
|
||||
console.log(`error received: ${err}`);
|
||||
disconnect();
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
|
||||
test('\'gather\' test - soniox', async(t) => {
|
||||
if (!process.env.SONIOX_API_KEY ) {
|
||||
t.pass('skipping soniox tests');
|
||||
return t.end();
|
||||
}
|
||||
clearModule.all();
|
||||
const {srf, disconnect} = require('../app');
|
||||
|
||||
try {
|
||||
await connect(srf);
|
||||
// GIVEN
|
||||
let verbs = [
|
||||
{
|
||||
"verb": "gather",
|
||||
"input": ["speech"],
|
||||
"recognizer": {
|
||||
"vendor": "deepgram",
|
||||
"hints": ["customer support", "sales", "human resources", "HR"],
|
||||
"deepgramOptions": {
|
||||
"apiKey": process.env.SONIOX_API_KEY
|
||||
}
|
||||
},
|
||||
"timeout": 10,
|
||||
"actionHook": "/actionHook"
|
||||
}
|
||||
];
|
||||
let from = "gather_success";
|
||||
provisionCallHook(from, verbs);
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'gather: succeeds when using soniox credentials');
|
||||
|
||||
disconnect();
|
||||
} catch (err) {
|
||||
|
||||
@@ -143,7 +143,7 @@ test('\'transcribe\' test - deepgram', async(t) => {
|
||||
{
|
||||
"verb": "transcribe",
|
||||
"recognizer": {
|
||||
"vendor": "aws",
|
||||
"vendor": "deepgram",
|
||||
"hints": ["customer support", "sales", "human resources", "HR"],
|
||||
"deepgramOptions": {
|
||||
"apiKey": process.env.DEEPGRAM_API_KEY
|
||||
@@ -160,6 +160,47 @@ test('\'transcribe\' test - deepgram', async(t) => {
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'transcribe: succeeds when using deepgram credentials');
|
||||
|
||||
disconnect();
|
||||
} catch (err) {
|
||||
console.log(`error received: ${err}`);
|
||||
disconnect();
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
|
||||
test('\'transcribe\' test - soniox', async(t) => {
|
||||
if (!process.env.SONIOX_API_KEY ) {
|
||||
t.pass('skipping soniox tests');
|
||||
return t.end();
|
||||
}
|
||||
clearModule.all();
|
||||
const {srf, disconnect} = require('../app');
|
||||
|
||||
try {
|
||||
await connect(srf);
|
||||
// GIVEN
|
||||
let verbs = [
|
||||
{
|
||||
"verb": "transcribe",
|
||||
"recognizer": {
|
||||
"vendor": "soniox",
|
||||
"hints": ["customer support", "sales", "human resources", "HR"],
|
||||
"deepgramOptions": {
|
||||
"apiKey": process.env.SONIOX_API_KEY
|
||||
}
|
||||
},
|
||||
"transcriptionHook": "/transcriptionHook"
|
||||
}
|
||||
];
|
||||
let from = "gather_success";
|
||||
provisionCallHook(from, verbs);
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'transcribe: succeeds when using soniox credentials');
|
||||
|
||||
disconnect();
|
||||
} catch (err) {
|
||||
console.log(`error received: ${err}`);
|
||||
|
||||
Reference in New Issue
Block a user