fix: Re-invite sip rec does not update media (#300 )

* fix: Re-invite sip rec does not update media * fix: Re-invite sip rec does not update media
response to siprec invite should have a:recvonly if offer had a:sendonly (#298 )
2026-01-25 02:07:56 +00:00 · 2023-04-05 09:46:32 -04:00 · 2023-04-04 21:02:21 -04:00 · 2023-04-03 11:13:12 -04:00 · 2023-04-01 13:20:59 -04:00 · 2023-04-01 11:35:13 -04:00
26 changed files with 2787 additions and 469 deletions
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-FROM --platform=linux/amd64 node:18-alpine3.16 as base
+FROM --platform=linux/amd64 node:18.14.1-alpine3.16 as base

 RUN apk --update --no-cache add --virtual .builds-deps build-base python3

--- a/README.md
+++ b/README.md
@@ -18,8 +18,10 @@ Configuration is provided via environment variables:
 |DRACHTIO_PORT| listening port of drachtio server for control connections (typically 9022)|yes|
 |DRACHTIO_SECRET| shared secret|yes|
 |ENABLE_METRICS| if 1, metrics will be generated|no|
+|ENCRYPTION_SECRET| secret for credential encryption(JWT_SECRET is deprecated) |yes|
 |GOOGLE_APPLICATION_CREDENTIALS| path to gcp service key file|yes|
 |HTTP_PORT| tcp port to listen on for API requests from jambonz-api-server|yes|
+|JAMBONES_GATHER_EARLY_HINTS_MATCH| if true and hints are provided, gather will opportunistically review interim transcripts if possible to reduce ASR latency |no|
 |JAMBONES_FREESWITCH| IP:port:secret for Freeswitch server (e.g. '127.0.0.1:8021:JambonzR0ck$'|yes|
 |JAMBONES_LOGLEVEL| log level for application, 'info' or 'debug'|no|
 |JAMBONES_MYSQL_HOST| mysql host|yes|
--- a/app.js
+++ b/app.js
@@ -8,6 +8,7 @@ assert.ok(process.env.DRACHTIO_SECRET, 'missing DRACHTIO_SECRET env var');
 assert.ok(process.env.JAMBONES_FREESWITCH, 'missing JAMBONES_FREESWITCH env var');
 assert.ok(process.env.JAMBONES_REDIS_HOST, 'missing JAMBONES_REDIS_HOST env var');
 assert.ok(process.env.JAMBONES_NETWORK_CIDR || process.env.K8S, 'missing JAMBONES_SUBNET env var');
+assert.ok(process.env.ENCRYPTION_SECRET || process.env.JWT_SECRET, 'missing ENCRYPTION_SECRET env var');

 const Srf = require('drachtio-srf');
 const srf = new Srf();
--- a/lib/http-routes/api/create-call.js
+++ b/lib/http-routes/api/create-call.js
@@ -104,7 +104,7 @@ router.post('/', async(req, res) => {
      proxy: `sip:${sbcAddress}`,
      localSdp: ep.local.sdp
    });
-    if (target.auth) opts.auth = this.target.auth;
+    if (target.auth) opts.auth = target.auth;


    /**
--- a/lib/middleware.js
+++ b/lib/middleware.js
@@ -27,7 +27,11 @@ module.exports = function(srf, logger) {

  function initLocals(req, res, next) {
    const callId = req.get('Call-ID');
-    logger.info({callId}, 'new incoming call');
+    logger.info({
+      callId,
+      callingNumber: req.callingNumber,
+      calledNumber: req.calledNumber
+    }, 'new incoming call');
    if (!req.has('X-Account-Sid')) {
      logger.info('getAccountDetails - rejecting call due to missing X-Account-Sid header');
      return res.send(500);
--- a/lib/session/call-session.js
+++ b/lib/session/call-session.js
@@ -511,12 +511,24 @@ class CallSession extends Emitter {

  async enableBotMode(gather, autoEnable) {
    try {
-      if (this.backgroundGatherTask) {
-        this.logger.info('CallSession:enableBotMode - bot mode currently enabled, ignoring request to start again');
-        return;
-      }
      const t = normalizeJambones(this.logger, [gather]);
-      this.backgroundGatherTask = makeTask(this.logger, t[0]);
+      const task = makeTask(this.logger, t[0]);
+
+      if (this.isBotModeEnabled) {
+        const currInput = this.backgroundGatherTask.input;
+        const newInput = task.input;
+        if (JSON.stringify(currInput) === JSON.stringify(newInput)) {
+          this.logger.info('CallSession:enableBotMode - bot mode currently enabled, ignoring request to start again');
+          return;
+        }
+        else {
+          this.logger.info({currInput, newInput},
+            'CallSession:enableBotMode - restarting background gather to apply new input type');
+          this.backgroundGatherTask.sticky = false;
+          this.disableBotMode();
+        }
+      }
+      this.backgroundGatherTask = task;
      this._bargeInEnabled = true;
      this.backgroundGatherTask
        .once('dtmf', this._clearTasks.bind(this, this.backgroundGatherTask))
@@ -528,13 +540,15 @@ class CallSession extends Emitter {
      const {span, ctx} = this.rootSpan.startChildSpan(`background-gather:${this.backgroundGatherTask.summary}`);
      this.backgroundGatherTask.span = span;
      this.backgroundGatherTask.ctx = ctx;
+      this.backgroundGatherTask.sticky = autoEnable;
      this.backgroundGatherTask.exec(this, resources)
        .then(() => {
          this.logger.info('CallSession:enableBotMode: gather completed');
          this.backgroundGatherTask && this.backgroundGatherTask.removeAllListeners();
          this.backgroundGatherTask && this.backgroundGatherTask.span.end();
+          const sticky = this.backgroundGatherTask?.sticky;
          this.backgroundGatherTask = null;
-          if (autoEnable && !this.callGone && !this._stopping && this._bargeInEnabled) {
+          if (sticky && !this.callGone && !this._stopping && this._bargeInEnabled) {
            this.logger.info('CallSession:enableBotMode: restarting background gather');
            setImmediate(() => this.enableBotMode(gather, true));
          }
@@ -636,7 +650,9 @@ class CallSession extends Emitter {
          return {
            speech_credential_sid: credential.speech_credential_sid,
            client_id: credential.client_id,
-            secret: credential.secret
+            secret: credential.secret,
+            nuance_tts_uri: credential.nuance_tts_uri,
+            nuance_stt_uri: credential.nuance_stt_uri
          };
        }
        else if ('deepgram' === vendor) {
@@ -645,6 +661,12 @@ class CallSession extends Emitter {
            api_key: credential.api_key
          };
        }
+        else if ('soniox' === vendor) {
+          return {
+            speech_credential_sid: credential.speech_credential_sid,
+            api_key: credential.api_key
+          };
+        }
        else if ('ibm' === vendor) {
          return {
            speech_credential_sid: credential.speech_credential_sid,
@@ -654,6 +676,14 @@ class CallSession extends Emitter {
            stt_region: credential.stt_region
          };
        }
+        else if (vendor.startsWith('custom:')) {
+          return {
+            speech_credential_sid: credential.speech_credential_sid,
+            auth_token: credential.auth_token,
+            custom_stt_url: credential.custom_stt_url,
+            custom_tts_url: credential.custom_tts_url
+          };
+        }
      }
      else {
        writeAlerts({
@@ -684,7 +714,7 @@ class CallSession extends Emitter {
        let skip = false;
        this.currentTask = task;
        if (TaskName.Gather === task.name && this.isBotModeEnabled) {
-          if (this.backgroundGatherTask.updateTaskInProgress(task)) {
+          if (this.backgroundGatherTask.updateTaskInProgress(task) !== false) {
            this.logger.info(`CallSession:exec skipping #${stackNum}:${taskNum}: ${task.name}`);
            skip = true;
          }
@@ -748,7 +778,6 @@ class CallSession extends Emitter {

  trackTmpFile(path) {
    // TODO: don't add if its already in the list (should we make it a set?)
-    this.logger.debug(`adding tmp file to track ${path}`);
    this.tmpFiles.add(path);
  }

@@ -1123,14 +1152,14 @@ class CallSession extends Emitter {
  _injectTasks(newTasks) {
    const gatherPos = this.tasks.map((t) => t.name).indexOf(TaskName.Gather);
    const currentlyExecutingGather = this.currentTask?.name === TaskName.Gather;
-
+    /*
    this.logger.debug({
      currentTaskList: listTaskNames(this.tasks),
      newContent: listTaskNames(newTasks),
      currentlyExecutingGather,
      gatherPos
    }, 'CallSession:_injectTasks - starting');
-
+    */
    const killGather = () => {
      this.logger.debug('CallSession:_injectTasks - killing current gather because we have new content');
      this.currentTask.kill(this);
@@ -1139,10 +1168,11 @@ class CallSession extends Emitter {
    if (-1 === gatherPos) {
      /* no gather in the stack  simply append tasks */
      this.tasks.push(...newTasks);
+      /*
      this.logger.debug({
        updatedTaskList: listTaskNames(this.tasks)
      }, 'CallSession:_injectTasks - completed (simple append)');
-
+      */
      /* we do need to kill the current gather if we are executing one */
      if (currentlyExecutingGather) killGather();
      return;
@@ -1170,12 +1200,10 @@ class CallSession extends Emitter {
            this.replaceApplication(t);
          }
          else if (process.env.JAMBONES_INJECT_CONTENT) {
-            this.logger.debug({tasks: listTaskNames(t)}, 'CallSession:_onCommand - queueing tasks (injecting content)');
            this._injectTasks(t);
            this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
          }
          else {
-            this.logger.debug({tasks: listTaskNames(t)}, 'CallSession:_onCommand - queueing tasks');
            this.tasks.push(...t);
            this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:_onCommand - updated task list');
          }
@@ -1219,7 +1247,7 @@ class CallSession extends Emitter {
        this.logger.info(`CallSession:_onCommand - invalid command ${command}`);
    }
    if (this.wakeupResolver) {
-      this.logger.debug({resolution}, 'CallSession:_onCommand - got commands, waking up..');
+      //this.logger.debug({resolution}, 'CallSession:_onCommand - got commands, waking up..');
      this.wakeupResolver(resolution);
      this.wakeupResolver = null;
    }
--- a/lib/session/siprec-call-session.js
+++ b/lib/session/siprec-call-session.js
@@ -1,6 +1,7 @@
 const InboundCallSession = require('./inbound-call-session');
 const {createSipRecPayload} = require('../utils/siprec-utils');
 const {CallStatus} = require('../utils/constants');
+const {parseSiprecPayload} = require('../utils/siprec-utils');
 /**
 * @classdesc Subclass of InboundCallSession.  This represents a CallSession that is
 * established for an inbound SIPREC call.
@@ -16,6 +17,32 @@ class SipRecCallSession extends InboundCallSession {
    this.metadata = metadata;
  }

+  async _onReinvite(req, res) {
+    try {
+      this.logger.info(req.payload, 'SipRec Re-INVITE payload');
+      const {sdp1: reSdp1, sdp2: reSdp2, metadata: reMetadata} = await parseSiprecPayload(req, this.logger);
+      this.sdp1 = reSdp1;
+      this.sdp2 = reSdp2;
+      this.metadata = reMetadata;
+
+      if (this.ep && this.ep2) {
+        let remoteSdp = this.sdp1.replace(/sendonly/, 'sendrecv');
+        const newSdp1 = await this.ep.modify(remoteSdp);
+        remoteSdp = this.sdp2.replace(/sendonly/, 'sendrecv');
+        const newSdp2 = await this.ep2.modify(remoteSdp);
+        const combinedSdp = await createSipRecPayload(newSdp1, newSdp2, this.logger);
+        res.send(200, {body: combinedSdp});
+        this.logger.info({offer: req.body, answer: combinedSdp}, 'SipRec handling reINVITE');
+      }
+      else {
+        this.logger.info('got SipRec reINVITE but no endpoint and media has not been released');
+        res.send(488);
+      }
+    } catch (err) {
+      this.logger.error(err, 'Error handling reinvite');
+    }
+  }
+
  async answerSipRecCall() {
    try {
      this.ms = this.getMS();
--- a/lib/tasks/dial.js
+++ b/lib/tasks/dial.js
@@ -400,15 +400,19 @@ class TaskDial extends Task {
    let fqdn;

    if (!sbcAddress) throw new Error('no SBC found for outbound call');
+    this.headers = {
+      'X-Account-Sid': cs.accountSid,
+      ...(req && req.has('X-CID') && {'X-CID': req.get('X-CID')}),
+      ...(req && req.has('P-Asserted-Identity') && {'P-Asserted-Identity': req.get('P-Asserted-Identity')}),
+      // Put headers at the end to make sure opt.headers override all default behavior.
+      ...this.headers
+    };
+
    const opts = {
-      headers: req && req.has('X-CID') ? Object.assign(this.headers, {'X-CID': req.get('X-CID')}) : this.headers,
+      headers: this.headers,
      proxy: `sip:${sbcAddress}`,
      callingNumber: this.callerId || req.callingNumber
    };
-    opts.headers = {
-      ...opts.headers,
-      'X-Account-Sid': cs.accountSid
-    };

    const t = this.target.find((t) => t.type === 'teams');
    if (t) {
--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -7,8 +7,10 @@ const {
  AwsTranscriptionEvents,
  AzureTranscriptionEvents,
  DeepgramTranscriptionEvents,
+  SonioxTranscriptionEvents,
  IbmTranscriptionEvents,
-  NvidiaTranscriptionEvents
+  NvidiaTranscriptionEvents,
+  JambonzTranscriptionEvents
 } = require('../utils/constants');

 const makeTask = require('./make_task');
@@ -33,11 +35,13 @@ class TaskGather extends Task {
      setChannelVarsForStt,
      normalizeTranscription,
      removeSpeechListeners,
-      setSpeechCredentialsAtRuntime
+      setSpeechCredentialsAtRuntime,
+      compileSonioxTranscripts
    } = require('../utils/transcription-utils')(logger);
    this.setChannelVarsForStt = setChannelVarsForStt;
    this.normalizeTranscription = normalizeTranscription;
    this.removeSpeechListeners = removeSpeechListeners;
+    this.compileSonioxTranscripts = compileSonioxTranscripts;

    [
      'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
@@ -50,9 +54,9 @@ class TaskGather extends Task {

    /* timeout of zero means no timeout */
    this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
-    this.interim = !!this.partialResultHook || this.bargein;
+    this.interim = !!this.partialResultHook || this.bargein || (this.timeout > 0);
    this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
-    this.minBargeinWordCount = this.data.minBargeinWordCount || 0;
+    this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
    if (this.data.recognizer) {
      const recognizer = this.data.recognizer;
      this.vendor = recognizer.vendor;
@@ -66,6 +70,11 @@ class TaskGather extends Task {
      if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
      this.isContinuousAsr = this.asrTimeout > 0;

+      if (Array.isArray(this.data.recognizer.hints) &&
+        0 == this.data.recognizer.hints.length && process.env.JAMBONES_GATHER_CLEAR_GLOBAL_HINTS_ON_EMPTY_HINTS) {
+        logger.debug('Gather: an empty hints array was supplied, so we will mask global hints');
+        this.maskGlobalSttHints = true;
+      }
      this.data.recognizer.hints = this.data.recognizer.hints || [];
      this.data.recognizer.altLanguages = this.data.recognizer.altLanguages || [];
    }
@@ -85,13 +94,21 @@ class TaskGather extends Task {
    /* buffer speech for continuous asr */
    this._bufferedTranscripts = [];

+    /* buffer for soniox transcripts */
+    this._sonioxTranscripts = [];
+
    this.parentTask = parentTask;
+    this.partialTranscriptsCount = 0;
  }

  get name() { return TaskName.Gather; }

  get needsStt() { return this.input.includes('speech'); }

+  get wantsSingleUtterance() {
+    return this.data.recognizer?.singleUtterance === true;
+  }
+
  get earlyMedia() {
    return (this.sayTask && this.sayTask.earlyMedia) ||
      (this.playTask && this.playTask.earlyMedia);
@@ -113,14 +130,17 @@ class TaskGather extends Task {
  }

  async exec(cs, {ep}) {
-    this.logger.debug('Gather:exec');
+    this.logger.debug({options: this.data}, 'Gather:exec');
    await super.exec(cs);
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
    const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;

-    if (cs.hasGlobalSttHints) {
+    if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
      const {hints, hintsBoost} = cs.globalSttHints;
-      this.data.recognizer.hints = this.data.recognizer.hints.concat(hints);
+      const setOfHints = new Set(this.data.recognizer.hints
+        .concat(hints)
+        .filter((h) => typeof h === 'string' && h.length > 0));
+      this.data.recognizer.hints = [...setOfHints];
      if (!this.data.recognizer.hintsBoost && hintsBoost) this.data.recognizer.hintsBoost = hintsBoost;
      this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
        'Gather:exec - applying global sttHints');
@@ -142,7 +162,8 @@ class TaskGather extends Task {
        asrDtmfTerminationDigit: this.asrDtmfTerminationDigit
      }, 'Gather:exec - enabling continuous ASR since it is turned on for the session');
    }
-    if (process.env.JAMBONZ_GATHER_EARLY_HINTS_MATCH && this.needsStt &&
+    const {JAMBONZ_GATHER_EARLY_HINTS_MATCH, JAMBONES_GATHER_EARLY_HINTS_MATCH} = process.env;
+    if ((JAMBONZ_GATHER_EARLY_HINTS_MATCH || JAMBONES_GATHER_EARLY_HINTS_MATCH) && this.needsStt &&
      !this.isContinuousAsr &&
      this.data.recognizer?.hints?.length > 0 && this.data.recognizer?.hints?.length <= 10) {
      this.earlyHintsMatch = true;
@@ -180,7 +201,6 @@ class TaskGather extends Task {
      throw new Error(`No speech-to-text service credentials for ${this.vendor} have been configured`);
    }

-    this.logger.info({sttCredentials: this.sttCredentials}, 'Gather:exec - sttCredentials');
    if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
      /* get nuance access token */
      const {client_id, secret} = this.sttCredentials;
@@ -199,7 +219,6 @@ class TaskGather extends Task {
      this._startTimer();
      if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
      if (this.input.includes('speech') && !this.listenDuringPrompt) {
-        this.logger.debug('Gather:exec - calling _initSpeech');
        this._initSpeech(cs, ep)
          .then(() => {
            if (this.killed) {
@@ -287,6 +306,7 @@ class TaskGather extends Task {
    this._killAudio(cs);
    this.ep.removeAllListeners('dtmf');
    clearTimeout(this.interDigitTimer);
+    this._clearAsrTimer();
    this.playTask?.span.end();
    this.sayTask?.span.end();
    this._resolve('killed');
@@ -300,6 +320,7 @@ class TaskGather extends Task {
    const {timeout} = opts;
    this.timeout = timeout;
    this._startTimer();
+    return true;
  }

  _onDtmf(cs, ep, evt) {
@@ -339,7 +360,6 @@ class TaskGather extends Task {

  async _initSpeech(cs, ep) {
    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
-    this.logger.debug(opts, 'TaskGather:_initSpeech - channel vars');
    switch (this.vendor) {
      case 'google':
        this.bugname = 'google_transcribe';
@@ -371,8 +391,6 @@ class TaskGather extends Task {
          this._onTranscriptionComplete.bind(this, cs, ep));
        ep.addCustomEventListener(NuanceTranscriptionEvents.VadDetected,
          this._onVadDetected.bind(this, cs, ep));
-        ep.addCustomEventListener(NuanceTranscriptionEvents.Error,
-          this._onNuanceError.bind(this, cs, ep));

        /* stall timers until prompt finishes playing */
        if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
@@ -388,14 +406,17 @@ class TaskGather extends Task {
          this._onDeepGramConnectFailure.bind(this, cs, ep));
        break;

+      case 'soniox':
+        this.bugname = 'soniox_transcribe';
+        ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
+        break;
+
      case 'ibm':
        this.bugname = 'ibm_transcribe';
        ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
        ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onIbmConnect.bind(this, cs, ep));
        ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
          this._onIbmConnectFailure.bind(this, cs, ep));
-        ep.addCustomEventListener(IbmTranscriptionEvents.Error,
-          this._onIbmError.bind(this, cs, ep));
        break;

      case 'nvidia':
@@ -408,8 +429,6 @@ class TaskGather extends Task {
          this._onTranscriptionComplete.bind(this, cs, ep));
        ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
          this._onVadDetected.bind(this, cs, ep));
-        ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
-          this._onNvidiaError.bind(this, cs, ep));

        /* I think nvidia has this (??) - stall timers until prompt finishes playing */
        if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
@@ -418,11 +437,23 @@ class TaskGather extends Task {
        break;

      default:
-        this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
-        this.notifyTaskDone();
-        throw new Error(`Invalid vendor ${this.vendor}`);
+        if (this.vendor.startsWith('custom:')) {
+          this.bugname = `${this.vendor}_transcribe`;
+          ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
+          ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onJambonzConnect.bind(this, cs, ep));
+          ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure,
+            this._onJambonzConnectFailure.bind(this, cs, ep));
+          break;
+        }
+        else {
+          this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
+          this.notifyTaskDone();
+          throw new Error(`Invalid vendor ${this.vendor}`);
+        }
    }

+    /* common handler for all stt engine errors */
+    ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
    await ep.set(opts)
      .catch((err) => this.logger.info(err, 'Error setting channel variables'));
  }
@@ -522,7 +553,7 @@ class TaskGather extends Task {
    // make sure this is not a transcript from answering machine detection
    const bugname = fsEvent.getHeader('media-bugname');
    const finished = fsEvent.getHeader('transcription-session-finished');
-    this.logger.debug({evt, bugname, finished}, 'Gather:_onTranscription');
+    this.logger.debug({evt, bugname, finished}, `Gather:_onTranscription for vendor ${this.vendor}`);
    if (bugname && this.bugname !== bugname) return;

    if (this.vendor === 'ibm') {
@@ -530,12 +561,26 @@ class TaskGather extends Task {
    }

    evt = this.normalizeTranscription(evt, this.vendor, 1, this.language);
+    if (evt.alternatives.length === 0) {
+      this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
+      return;
+    }
+
+    /* fast path: our first partial transcript exactly matches an early hint */
+    if (this.earlyHintsMatch && evt.is_final === false && this.partialTranscriptsCount++ === 0) {
+      const transcript = evt.alternatives[0].transcript?.toLowerCase();
+      const hints = this.data.recognizer?.hints || [];
+      if (hints.find((h) => h.toLowerCase() === transcript)) {
+        this.logger.debug({evt}, 'Gather:_onTranscription: early hint match');
+        this._resolve('speech', evt);
+        return;
+      }
+    }

    /* count words for bargein feature */
    const words = evt.alternatives[0]?.transcript.split(' ').length;
-    const bufferedWords = this._bufferedTranscripts.reduce((count, e) => {
-      return count + e.alternatives[0]?.transcript.split(' ').length;
-    }, 0);
+    const bufferedWords = this._sonioxTranscripts.length +
+      this._bufferedTranscripts.reduce((count, e) => count + e.alternatives[0]?.transcript.split(' ').length, 0);

    if (evt.is_final) {
      if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
@@ -544,7 +589,6 @@ class TaskGather extends Task {
        }
        else {
          this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
-          //this._startTranscribing(ep);
        }
        return;
      }
@@ -568,7 +612,9 @@ class TaskGather extends Task {
          return this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
        }
        this._startAsrTimer();
-        return this._startTranscribing(ep);
+
+        /* some STT engines will keep listening after a final response, so no need to restart */
+        if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
      }
      else {
        if (this.bargein && (words + bufferedWords) < this.minBargeinWordCount) {
@@ -579,6 +625,12 @@ class TaskGather extends Task {
          return;
        }
        else {
+          if (this.vendor === 'soniox') {
+            /* compile transcripts into one */
+            this._sonioxTranscripts.push(evt.vendor.finalWords);
+            evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
+            this._sonioxTranscripts = [];
+          }
          this._resolve('speech', evt);
        }
      }
@@ -589,6 +641,8 @@ class TaskGather extends Task {
        others do not.
      */
      //const isStableEnough = typeof evt.stability === 'undefined' || evt.stability > GATHER_STABILITY_THRESHOLD;
+      this._clearTimer();
+      this._startTimer();
      if (this.bargein && (words + bufferedWords) >= this.minBargeinWordCount) {
        if (!this.playComplete) {
          this.logger.debug({transcript: evt.alternatives[0].transcript}, 'killing audio due to speech');
@@ -602,6 +656,13 @@ class TaskGather extends Task {
        this.cs.requestor.request('verb:hook', this.partialResultHook,  Object.assign({speech: evt},
          this.cs.callInfo, httpHeaders));
      }
+      if (this.vendor === 'soniox') {
+        this._clearTimer();
+        if (evt.vendor.finalWords.length) {
+          this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');
+          this._sonioxTranscripts.push(evt.vendor.finalWords);
+        }
+      }
    }
  }
  _onEndOfUtterance(cs, ep) {
@@ -610,34 +671,53 @@ class TaskGather extends Task {
      this._killAudio(cs);
    }

-    if (!this.resolved && !this.killed && !this._bufferedTranscripts.length) {
+    /**
+     * By default, Gather asks google for multiple utterances.
+     * The reason is that we can sometimes get an 'end_of_utterance' event without
+     * getting a transcription.  This can happen if someone coughs or mumbles.
+     * For that reason don't ask for a single utterance and we'll terminate the transcribe operation
+     * once we get a final transcript.
+     * However, if the usr has specified a singleUtterance, then we need to restart here
+     * since we dont have a final transcript yet.
+     */
+    if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsSingleUtterance) {
      this._startTranscribing(ep);
    }
  }

  _onStartOfSpeech(cs, ep) {
    this.logger.debug('TaskGather:_onStartOfSpeech');
+    if (this.bargein) {
+      this._killAudio(cs);
+    }
  }
  _onTranscriptionComplete(cs, ep) {
    this.logger.debug('TaskGather:_onTranscriptionComplete');
  }
-  _onNuanceError(cs, ep, evt) {
-    const {code, error, details} = evt;
-    if (code === 404 && error === 'No speech') {
-      this.logger.debug({code, error, details}, 'TaskGather:_onNuanceError');
-      return this._resolve('timeout');
-    }
-    this.logger.info({code, error, details}, 'TaskGather:_onNuanceError');
-    if (code === 413 && error === 'Too much speech') {
-      return this._resolve('timeout');
-    }
-  }
-  _onNvidiaError(cs, ep, evt) {
-    this.logger.info({evt}, 'TaskGather:_onNvidiaError');
-  }
  _onDeepgramConnect(_cs, _ep) {
    this.logger.debug('TaskGather:_onDeepgramConnect');
  }
+  _onJambonzConnect(_cs, _ep) {
+    this.logger.debug('TaskGather:_onJambonzConnect');
+  }
+  _onJambonzError(cs, _ep, evt) {
+    this.logger.info({evt}, 'TaskGather:_onJambonzError');
+    const {writeAlerts, AlertType} = cs.srf.locals;
+
+    if (this.vendor === 'nuance') {
+      const {code, error} = evt;
+      if (code === 404 && error === 'No speech') return this._resolve('timeout');
+      if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
+    }
+    this.logger.info({evt}, 'TaskGather:_onJambonzError');
+    writeAlerts({
+      account_sid: cs.accountSid,
+      alert_type: AlertType.STT_FAILURE,
+      message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
+      vendor: this.vendor,
+    }).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
+    this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
+  }

  _onDeepGramConnectFailure(cs, _ep, evt) {
    const {reason} = evt;
@@ -652,6 +732,19 @@ class TaskGather extends Task {
    this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor deepgram: ${reason}`});
    this.notifyTaskDone();
  }
+  _onJambonzConnectFailure(cs, _ep, evt) {
+    const {reason} = evt;
+    const {writeAlerts, AlertType} = cs.srf.locals;
+    this.logger.info({evt}, 'TaskGather:_onJambonzConnectFailure');
+    writeAlerts({
+      account_sid: cs.accountSid,
+      alert_type: AlertType.STT_FAILURE,
+      message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
+      vendor: this.vendor,
+    }).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
+    this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
+    this.notifyTaskDone();
+  }

  _onIbmConnect(_cs, _ep) {
    this.logger.debug('TaskGather:_onIbmConnect');
@@ -701,6 +794,10 @@ class TaskGather extends Task {
    if (this.resolved) return;

    this.resolved = true;
+    // Clear dtmf event
+    if (this.dtmfBargein) {
+      this.ep.removeAllListeners('dtmf');
+    }
    clearTimeout(this.interDigitTimer);
    this._clearTimer();

--- a/lib/tasks/listen.js
+++ b/lib/tasks/listen.js
@@ -2,6 +2,7 @@ const Task = require('./task');
 const {TaskName, TaskPreconditions, ListenEvents, ListenStatus} = require('../utils/constants');
 const makeTask = require('./make_task');
 const moment = require('moment');
+const MAX_PLAY_AUDIO_QUEUE_SIZE = 10;

 class TaskListen extends Task {
  constructor(logger, opts, parentTask) {
@@ -20,6 +21,8 @@ class TaskListen extends Task {
    this.nested = parentTask instanceof Task;

    this.results = {};
+    this.playAudioQueue = [];
+    this.isPlayingAudioFromQueue = false;

    if (this.transcribe) this.transcribeTask = makeTask(logger, {'transcribe': opts.transcribe}, this);
  }
@@ -58,6 +61,7 @@ class TaskListen extends Task {
    super.kill(cs);
    this.logger.debug(`TaskListen:kill endpoint connected? ${this.ep && this.ep.connected}`);
    this._clearTimer();
+    this.playAudioQueue = [];
    if (this.ep && this.ep.connected) {
      this.logger.debug('TaskListen:kill closing websocket');
      try {
@@ -184,16 +188,36 @@ class TaskListen extends Task {
    this.notifyTaskDone();
  }

-  async _onPlayAudio(ep, evt) {
-    this.logger.info(`received play_audio event: ${JSON.stringify(evt)}`);
+  async _playAudio(ep, evt, logger) {
    try {
      const results = await ep.play(evt.file);
-      this.logger.debug(`Finished playing file, result: ${JSON.stringify(results)}`);
+      logger.debug(`Finished playing file, result: ${JSON.stringify(results)}`);
      ep.forkAudioSendText({type: 'playDone', data: Object.assign({id: evt.id}, results)});
+    } catch (err) {
+      logger.error({err}, 'Error playing file');
    }
-    catch (err) {
-      this.logger.error({err}, 'Error playing file');
+  }
+
+  async _onPlayAudio(ep, evt) {
+    this.logger.info(`received play_audio event: ${JSON.stringify(evt)}`);
+    if (!evt.queuePlay) {
+      this.playAudioQueue = [];
+      this._playAudio(ep, evt, this.logger);
+      this.isPlayingAudioFromQueue = false;
+      return;
    }
+
+    if (this.playAudioQueue.length <= MAX_PLAY_AUDIO_QUEUE_SIZE) {
+      this.playAudioQueue.push(evt);
+    }
+
+    if (this.isPlayingAudioFromQueue) return;
+
+    this.isPlayingAudioFromQueue = true;
+    while (this.playAudioQueue.length > 0) {
+      await this._playAudio(ep, this.playAudioQueue.shift(), this.logger);
+    }
+    this.isPlayingAudioFromQueue = false;
  }

  _onKillAudio(ep) {
--- a/lib/tasks/play.js
+++ b/lib/tasks/play.js
@@ -37,6 +37,7 @@ class TaskPlay extends Task {
      }, this.timeoutSecs * 1000);
    }
    try {
+      this.notifyStatus({event: 'start-playback'});
      while (!this.killed && (this.loop === 'forever' || this.loop--) && this.ep.connected) {
        if (cs.isInConference) {
          const {memberId, confName, confUuid} = cs;
@@ -80,7 +81,8 @@ class TaskPlay extends Task {
        this.killPlayToConfMember(this.ep, memberId, confName);
      }
      else {
-        await this.ep.api('uuid_break', this.ep.uuid).catch((err) => this.logger.info(err, 'Error killing audio'));
+        this.notifyStatus({event: 'kill-playback'});
+        this.ep.api('uuid_break', this.ep.uuid).catch((err) => this.logger.info(err, 'Error killing audio'));
      }
    }
  }
--- a/lib/tasks/rest_dial.js
+++ b/lib/tasks/rest_dial.js
@@ -27,7 +27,7 @@ class TaskRestDial extends Task {
  */
  async exec(cs) {
    await super.exec(cs);
-    this.req = cs.req;
+    this.canCancel = true;

    this._setCallTimer();
    await this.awaitTaskDone();
@@ -36,15 +36,15 @@ class TaskRestDial extends Task {
  kill(cs) {
    super.kill(cs);
    this._clearCallTimer();
-    if (this.req) {
-      this.req.cancel();
-      this.req = null;
+    if (this.canCancel && cs?.req) {
+      this.canCancel = false;
+      cs.req.cancel();
    }
    this.notifyTaskDone();
  }

  async _onConnect(dlg) {
-    this.req = null;
+    this.canCancel = false;
    const cs = this.callSession;
    cs.setDialog(dlg);

@@ -79,7 +79,7 @@ class TaskRestDial extends Task {
  _onCallStatus(status) {
    this.logger.debug(`CallStatus: ${status}`);
    if (status >= 200) {
-      this.req = null;
+      this.canCancel = false;
      this._clearCallTimer();
      if (status !== 200) this.notifyTaskDone();
    }
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -143,7 +143,7 @@ class TaskSay extends Task {
          span.end();
          writeAlerts({
            account_sid: cs.accountSid,
-            alert_type: AlertType.TTS_NOT_PROVISIONED,
+            alert_type: AlertType.TTS_FAILURE,
            vendor,
            detail: err.message
          }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
@@ -154,7 +154,6 @@ class TaskSay extends Task {

      const arr = this.text.map((t) => generateAudio(t));
      const filepath = (await Promise.all(arr)).filter((fp) => fp && fp.length);
-      this.logger.debug({filepath}, 'synthesized files for tts');
      this.notifyStatus({event: 'start-playback'});

      while (!this.killed && (this.loop === 'forever' || this.loop--) && this.ep?.connected) {
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -3,12 +3,14 @@ const {
  TaskName,
  TaskPreconditions,
  GoogleTranscriptionEvents,
-  AzureTranscriptionEvents,
-  AwsTranscriptionEvents,
  NuanceTranscriptionEvents,
+  AwsTranscriptionEvents,
+  AzureTranscriptionEvents,
  DeepgramTranscriptionEvents,
+  SonioxTranscriptionEvents,
  IbmTranscriptionEvents,
-  NvidiaTranscriptionEvents
+  NvidiaTranscriptionEvents,
+  JambonzTranscriptionEvents
 } = require('../utils/constants');
 const { normalizeJambones } = require('@jambonz/verb-specifications');

@@ -22,11 +24,13 @@ class TaskTranscribe extends Task {
      setChannelVarsForStt,
      normalizeTranscription,
      removeSpeechListeners,
-      setSpeechCredentialsAtRuntime
+      setSpeechCredentialsAtRuntime,
+      compileSonioxTranscripts
    } = require('../utils/transcription-utils')(logger);
    this.setChannelVarsForStt = setChannelVarsForStt;
    this.normalizeTranscription = normalizeTranscription;
    this.removeSpeechListeners = removeSpeechListeners;
+    this.compileSonioxTranscripts = compileSonioxTranscripts;

    this.transcriptionHook = this.data.transcriptionHook;
    this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
@@ -40,6 +44,9 @@ class TaskTranscribe extends Task {
    /* let credentials be supplied in the recognizer object at runtime */
    this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);

+    /* buffer for soniox transcripts */
+    this._sonioxTranscripts = [];
+
    recognizer.hints = recognizer.hints || [];
    recognizer.altLanguages = recognizer.altLanguages || [];
  }
@@ -183,8 +190,6 @@ class TaskTranscribe extends Task {
          this._onStartOfSpeech.bind(this, cs, ep, channel));
        ep.addCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete,
          this._onTranscriptionComplete.bind(this, cs, ep, channel));
-        ep.addCustomEventListener(AzureTranscriptionEvents.Error,
-          this._onNuanceError.bind(this, cs, ep, channel));
        break;
      case 'deepgram':
        this.bugname = 'deepgram_transcribe';
@@ -195,7 +200,11 @@ class TaskTranscribe extends Task {
        ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
          this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
        break;
-
+      case 'soniox':
+        this.bugname = 'soniox_transcribe';
+        ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription,
+          this._onTranscription.bind(this, cs, ep, channel));
+        break;
      case 'ibm':
        this.bugname = 'ibm_transcribe';
        ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
@@ -204,8 +213,6 @@ class TaskTranscribe extends Task {
          this._onIbmConnect.bind(this, cs, ep, channel));
        ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
          this._onIbmConnectFailure.bind(this, cs, ep, channel));
-        ep.addCustomEventListener(IbmTranscriptionEvents.Error,
-          this._onIbmError.bind(this, cs, ep, channel));
        break;

      case 'nvidia':
@@ -218,14 +225,13 @@ class TaskTranscribe extends Task {
          this._onTranscriptionComplete.bind(this, cs, ep));
        ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
          this._onVadDetected.bind(this, cs, ep));
-        ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
-          this._onNvidiaError.bind(this, cs, ep));
        break;
-
      default:
        throw new Error(`Invalid vendor ${this.vendor}`);
    }

+    /* common handler for all stt engine errors */
+    ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
    await ep.set(opts)
      .catch((err) => this.logger.info(err, 'Error setting channel variables'));

@@ -253,8 +259,11 @@ class TaskTranscribe extends Task {
    this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - before normalization');

    evt = this.normalizeTranscription(evt, this.vendor, channel, this.language);
-
    this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
+    if (evt.alternatives.length === 0) {
+      this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
+      return;
+    }

    if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
      if (['microsoft', 'deepgram'].includes(this.vendor)) {
@@ -267,6 +276,15 @@ class TaskTranscribe extends Task {
      return;
    }

+    if (this.vendor === 'soniox') {
+      /* compile transcripts into one */
+      this._sonioxTranscripts.push(evt.vendor.finalWords);
+      if (evt.is_final) {
+        evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
+        this._sonioxTranscripts = [];
+      }
+    }
+
    if (this.transcriptionHook) {
      const b3 = this.getTracingPropagation();
      const httpHeaders = b3 && {b3};
@@ -315,20 +333,6 @@ class TaskTranscribe extends Task {
      this._timer = null;
    }
  }
-  _onNuanceError(_cs, _ep, _channel, evt) {
-    const {code, error, details} = evt;
-    if (code === 404 && error === 'No speech') {
-      this.logger.debug({code, error, details}, 'TaskTranscribe:_onNuanceError');
-      return this._resolve('timeout');
-    }
-    this.logger.info({code, error, details}, 'TaskTranscribe:_onNuanceError');
-    if (code === 413 && error === 'Too much speech') {
-      return this._resolve('timeout');
-    }
-  }
-  _onNvidiaError(cs, ep, evt) {
-    this.logger.info({evt}, 'TaskGather:_onNvidiaError');
-  }
  _onDeepgramConnect(_cs, _ep) {
    this.logger.debug('TaskTranscribe:_onDeepgramConnect');
  }
@@ -365,7 +369,25 @@ class TaskTranscribe extends Task {
    this.notifyTaskDone();
  }
  _onIbmError(cs, _ep, _channel, evt) {
-    this.logger.info({evt}, 'TaskGather:_onIbmError');
+    this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
+  }
+  _onJambonzError(cs, _ep, evt) {
+    this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
+    const {writeAlerts, AlertType} = cs.srf.locals;
+
+    if (this.vendor === 'nuance') {
+      const {code, error} = evt;
+      if (code === 404 && error === 'No speech') return this._resolve('timeout');
+      if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
+    }
+    this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
+    writeAlerts({
+      account_sid: cs.accountSid,
+      alert_type: AlertType.STT_FAILURE,
+      message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
+      vendor: this.vendor,
+    }).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
+    this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
  }


--- a/lib/utils/amd-utils.js
+++ b/lib/utils/amd-utils.js
@@ -267,7 +267,6 @@ module.exports = (logger) => {
      ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
      ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, onNoSpeechDetected.bind(null, cs, ep, task));
    }
-    logger.debug({sttOpts}, 'startAmd: setting channel vars');
    await ep.set(sttOpts).catch((err) => logger.info(err, 'Error setting channel variables'));

    amd
--- a/lib/utils/constants.json
+++ b/lib/utils/constants.json
@@ -86,6 +86,10 @@
    "ConnectFailure": "deepgram_transcribe::connect_failed",
    "Connect": "deepgram_transcribe::connect"
  },
+  "SonioxTranscriptionEvents": {
+    "Transcription": "soniox_transcribe::transcription",
+    "Error": "soniox_transcribe::error"
+  },
  "IbmTranscriptionEvents": {
    "Transcription": "ibm_transcribe::transcription",
    "ConnectFailure": "ibm_transcribe::connect_failed",
@@ -106,6 +110,12 @@
    "NoSpeechDetected": "azure_transcribe::no_speech_detected",
    "VadDetected": "azure_transcribe::vad_detected"
  },
+  "JambonzTranscriptionEvents": {
+    "Transcription": "jambonz_transcribe::transcription",
+    "ConnectFailure": "jambonz_transcribe::connect_failed",
+    "Connect": "jambonz_transcribe::connect",
+    "Error": "jambonz_transcribe::error"
+  },
  "ListenEvents": {
    "Connect": "mod_audio_fork::connect",
    "ConnectFailure": "mod_audio_fork::connect_failed",
@@ -147,6 +157,7 @@
    "queue:status",
    "dial:confirm",
    "verb:hook",
+    "verb:status",
    "jambonz:error"
  ],
  "RecordState": {
--- a/lib/utils/db-utils.js
+++ b/lib/utils/db-utils.js
@@ -50,6 +50,8 @@ const speechMapper = (cred) => {
      const o = JSON.parse(decrypt(credential));
      obj.client_id = o.client_id;
      obj.secret = o.secret;
+      obj.nuance_tts_uri = o.nuance_tts_uri;
+      obj.nuance_stt_uri = o.nuance_stt_uri;
    }
    else if ('ibm' === obj.vendor) {
      const o = JSON.parse(decrypt(credential));
@@ -62,6 +64,16 @@ const speechMapper = (cred) => {
      const o = JSON.parse(decrypt(credential));
      obj.api_key = o.api_key;
    }
+    else if ('soniox' === obj.vendor) {
+      const o = JSON.parse(decrypt(credential));
+      obj.api_key = o.api_key;
+    }
+    else if (obj.vendor.startsWith('custom:')) {
+      const o = JSON.parse(decrypt(credential));
+      obj.auth_token = o.auth_token;
+      obj.custom_stt_url = o.custom_stt_url;
+      obj.custom_tts_url = o.custom_tts_url;
+    }
  } catch (err) {
    console.log(err);
  }
@@ -79,47 +91,13 @@ module.exports = (logger, srf) => {
    const [r2] = await pp.query(sqlSpeechCredentials, account_sid);
    const speech = r2.map(speechMapper);

-    /* search at the service provider level if we don't find it at the account level */
-    const haveGoogle = speech.find((s) => s.vendor === 'google');
-    const haveAws = speech.find((s) => s.vendor === 'aws');
-    const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft');
-    const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid');
-    const haveNuance = speech.find((s) => s.vendor === 'nuance');
-    const haveDeepgram = speech.find((s) => s.vendor === 'deepgram');
-    const haveIbm = speech.find((s) => s.vendor === 'ibm');
-    if (!haveGoogle || !haveAws || !haveMicrosoft || !haveWellsaid || !haveNuance || !haveIbm || !haveDeepgram) {
-      const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
-      if (r3.length) {
-        if (!haveGoogle) {
-          const google = r3.find((s) => s.vendor === 'google');
-          if (google) speech.push(speechMapper(google));
-        }
-        if (!haveAws) {
-          const aws = r3.find((s) => s.vendor === 'aws');
-          if (aws) speech.push(speechMapper(aws));
-        }
-        if (!haveMicrosoft) {
-          const ms = r3.find((s) => s.vendor === 'microsoft');
-          if (ms) speech.push(speechMapper(ms));
-        }
-        if (!haveWellsaid) {
-          const wellsaid = r3.find((s) => s.vendor === 'wellsaid');
-          if (wellsaid) speech.push(speechMapper(wellsaid));
-        }
-        if (!haveNuance) {
-          const nuance = r3.find((s) => s.vendor === 'nuance');
-          if (nuance) speech.push(speechMapper(nuance));
-        }
-        if (!haveDeepgram) {
-          const deepgram = r3.find((s) => s.vendor === 'deepgram');
-          if (deepgram) speech.push(speechMapper(deepgram));
-        }
-        if (!haveIbm) {
-          const ibm = r3.find((s) => s.vendor === 'ibm');
-          if (ibm) speech.push(speechMapper(ibm));
-        }
+    /* add service provider creds unless we have that vendor at the account level */
+    const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
+    r3.forEach((s) => {
+      if (!speech.find((s2) => s2.vendor === s.vendor)) {
+        speech.push(speechMapper(s));
      }
-    }
+    });

    return {
      ...r[0],
--- a/lib/utils/encrypt-decrypt.js
+++ b/lib/utils/encrypt-decrypt.js
@@ -2,9 +2,9 @@ const crypto = require('crypto');
 const algorithm = process.env.LEGACY_CRYPTO ? 'aes-256-ctr' : 'aes-256-cbc';
 const iv = crypto.randomBytes(16);
 const secretKey = crypto.createHash('sha256')
-  .update(String(process.env.JWT_SECRET))
+  .update(process.env.ENCRYPTION_SECRET || process.env.JWT_SECRET)
  .digest('base64')
-  .substr(0, 32);
+  .substring(0, 32);

 const encrypt = (text) => {
  const cipher = crypto.createCipheriv(algorithm, secretKey, iv);
@@ -25,8 +25,8 @@ const decrypt = (data) => {
    throw err;
  }
  const decipher = crypto.createDecipheriv(algorithm, secretKey, Buffer.from(hash.iv, 'hex'));
-  const decrpyted = Buffer.concat([decipher.update(Buffer.from(hash.content, 'hex')), decipher.final()]);
-  return decrpyted.toString();
+  const decrypted = Buffer.concat([decipher.update(Buffer.from(hash.content, 'hex')), decipher.final()]);
+  return decrypted.toString();
 };

 module.exports = {
--- a/lib/utils/install-srf-locals.js
+++ b/lib/utils/install-srf-locals.js
@@ -138,7 +138,6 @@ function installSrfLocals(srf, logger) {
    retrieveCall,
    listCalls,
    deleteCall,
-    synthAudio,
    createHash,
    retrieveHash,
    deleteKey,
@@ -151,11 +150,17 @@ function installSrfLocals(srf, logger) {
    pushBack,
    popFront,
    removeFromList,
-    lengthOfList,
    getListPosition,
+    lengthOfList,
+  } = require('@jambonz/realtimedb-helpers')({
+    host: process.env.JAMBONES_REDIS_HOST,
+    port: process.env.JAMBONES_REDIS_PORT || 6379
+  }, logger, tracer);
+  const {
+    synthAudio,
    getNuanceAccessToken,
    getIbmAccessToken,
-  } = require('@jambonz/realtimedb-helpers')({
+  } = require('@jambonz/speech-utils')({
    host: process.env.JAMBONES_REDIS_HOST,
    port: process.env.JAMBONES_REDIS_PORT || 6379
  }, logger, tracer);
--- a/lib/utils/siprec-utils.js
+++ b/lib/utils/siprec-utils.js
@@ -242,7 +242,8 @@ const createSipRecPayload = (sdp1, sdp2, logger) => {
    .replace(/a=sendonly\r\n/g, '')
    .replace(/a=direction:both\r\n/g, '');
  */
-  return combinedSdp;
+
+  return combinedSdp.replace(/sendrecv/g, 'recvonly');
 };

 module.exports = { parseSiprecPayload, createSipRecPayload } ;
--- a/lib/utils/transcription-utils.js
+++ b/lib/utils/transcription-utils.js
@@ -5,7 +5,9 @@ const {
  AwsTranscriptionEvents,
  NuanceTranscriptionEvents,
  DeepgramTranscriptionEvents,
-  NvidiaTranscriptionEvents
+  SonioxTranscriptionEvents,
+  NvidiaTranscriptionEvents,
+  JambonzTranscriptionEvents
 } = require('./constants');

 const stickyVars = {
@@ -27,6 +29,7 @@ const stickyVars = {
    'AZURE_SERVICE_ENDPOINT_ID',
    'AZURE_REQUEST_SNR',
    'AZURE_PROFANITY_OPTION',
+    'AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES',
    'AZURE_SERVICE_ENDPOINT',
    'AZURE_INITIAL_SPEECH_TIMEOUT_MS',
    'AZURE_USE_OUTPUT_FORMAT_DETAILED',
@@ -88,9 +91,70 @@ const stickyVars = {
  ],
  nvidia: [
    'NVIDIA_HINTS'
+  ],
+  soniox: [
+    'SONIOX_PROFANITY_FILTER',
+    'SONIOX_MODEL'
  ]
 };

+const compileSonioxTranscripts = (finalWordChunks, channel, language) => {
+  const words = finalWordChunks.flat();
+  const transcript = words.reduce((acc, word) => {
+    if (word.text === '<end>') return acc;
+    if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`;
+    return `${acc} ${word.text}`;
+  }, '').trim();
+  const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== '<end>');
+  const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length;
+  const alternatives = [{transcript, confidence}];
+  return {
+    language_code: language,
+    channel_tag: channel,
+    is_final: true,
+    alternatives,
+    vendor: {
+      name: 'soniox',
+      evt: words
+    }
+  };
+};
+
+const normalizeSoniox = (evt, channel, language) => {
+  const copy = JSON.parse(JSON.stringify(evt));
+
+  /* an <end> token indicates the end of an utterance */
+  const endTokenPos = evt.words.map((w) => w.text).indexOf('<end>');
+  const endpointReached = endTokenPos !== -1;
+  const words = endpointReached ? evt.words.slice(0, endTokenPos) : evt.words;
+
+  /* note: we can safely ignore words after the <end> token as they will be returned again */
+  const finalWords = words.filter((word) => word.is_final);
+  const nonFinalWords = words.filter((word) => !word.is_final);
+
+  const is_final = endpointReached && finalWords.length > 0;
+  const transcript = words.reduce((acc, word) => {
+    if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`;
+    else return `${acc} ${word.text}`;
+  }, '').trim();
+  const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== '<end>');
+  const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length;
+  const alternatives = [{transcript, confidence}];
+  return {
+    language_code: language,
+    channel_tag: channel,
+    is_final,
+    alternatives,
+    vendor: {
+      name: 'soniox',
+      endpointReached,
+      evt: copy,
+      finalWords,
+      nonFinalWords
+    }
+  };
+};
+
 const normalizeDeepgram = (evt, channel, language) => {
  const copy = JSON.parse(JSON.stringify(evt));
  const alternatives = (evt.channel?.alternatives || [])
@@ -161,6 +225,15 @@ const normalizeGoogle = (evt, channel, language) => {
  };
 };

+const normalizeCustom = (evt, channel, language) => {
+  return {
+    language_code: language,
+    channel_tag: channel,
+    is_final: evt.is_final,
+    alternatives: [evt.alternatives[0]]
+  };
+};
+
 const normalizeNuance = (evt, channel, language) => {
  const copy = JSON.parse(JSON.stringify(evt));
  return {
@@ -221,7 +294,7 @@ const normalizeAws = (evt, channel, language) => {
 module.exports = (logger) => {
  const normalizeTranscription = (evt, vendor, channel, language) => {

-    logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
+    //logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
    switch (vendor) {
      case 'deepgram':
        return normalizeDeepgram(evt, channel, language);
@@ -237,7 +310,12 @@ module.exports = (logger) => {
        return normalizeIbm(evt, channel, language);
      case 'nvidia':
        return normalizeNvidia(evt, channel, language);
+      case 'soniox':
+        return normalizeSoniox(evt, channel, language);
      default:
+        if (vendor.startsWith('custom:')) {
+          return normalizeCustom(evt, channel, language);
+        }
        logger.error(`Unknown vendor ${vendor}`);
        return evt;
    }
@@ -247,6 +325,7 @@ module.exports = (logger) => {
    let opts = {};
    const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
    const vad = {enable, voiceMs, mode};
+    const vendor = rOpts.vendor;

    /* voice activity detection works across vendors */
    opts = {
@@ -256,59 +335,43 @@ module.exports = (logger) => {
      ...(vad.enable && typeof vad.mode === 'number' && {RECOGNIZER_VAD_MODE: vad.mode}),
    };

-    if ('google' === rOpts.vendor) {
+    if ('google' === vendor) {
+      let model = 'phone_call';
+      if (rOpts.altLanguages.length > 0) model = task.name === TaskName.Gather ? 'command_and_search' : 'latest_long';
      opts = {
        ...opts,
-        ...(sttCredentials &&
-          {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
-        ...(rOpts.enhancedModel &&
-            {GOOGLE_SPEECH_USE_ENHANCED: 1}),
-        ...(rOpts.separateRecognitionPerChannel &&
-          {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
-        ...(rOpts.profanityFilter &&
-          {GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
-        ...(rOpts.punctuation &&
-          {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
-        ...(rOpts.words &&
-          {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
-        ...((rOpts.singleUtterance ||  task.name === TaskName.Gather) &&
-          {GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
-        ...(rOpts.diarization &&
-          {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
+        ...(sttCredentials && {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
+        ...(rOpts.separateRecognitionPerChannel && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
+        ...(rOpts.separateRecognitionPerChanne === false && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
+        ...(rOpts.profanityFilter && {GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
+        ...(rOpts.punctuation && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
+        ...(rOpts.words && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
+        ...(rOpts.singleUtterance && {GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
+        ...(rOpts.diarization && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
        ...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 &&
          {GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}),
        ...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 &&
          {GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}),
-        ...(rOpts.enhancedModel === false &&
-          {GOOGLE_SPEECH_USE_ENHANCED: 0}),
-        ...(rOpts.separateRecognitionPerChannel === false &&
-          {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
-        ...(rOpts.profanityFilter === false &&
-          {GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
-        ...(rOpts.punctuation === false &&
-          {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
-        ...(rOpts.words  == false &&
-          {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
-        ...((rOpts.singleUtterance === false || task.name === TaskName.Transcribe) &&
-          {GOOGLE_SPEECH_SINGLE_UTTERANCE: 0}),
-        ...(rOpts.diarization === false &&
-          {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
+        ...(rOpts.enhancedModel && {GOOGLE_SPEECH_USE_ENHANCED: 1}),
+        ...(rOpts.profanityFilter === false && {GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
+        ...(rOpts.punctuation === false && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
+        ...(rOpts.words  == false && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
+        ...(rOpts.diarization === false && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
          {GOOGLE_SPEECH_HINTS: rOpts.hints.join(',')}),
        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
          {GOOGLE_SPEECH_HINTS: JSON.stringify(rOpts.hints)}),
-        ...(typeof rOpts.hintsBoost === 'number' &&
-          {GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
+        ...(typeof rOpts.hintsBoost === 'number' && {GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
        ...(rOpts.altLanguages.length > 0 &&
-          {GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: rOpts.altLanguages.join(',')}),
+          {GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
        ...(rOpts.interactionType &&
          {GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}),
-        ...{GOOGLE_SPEECH_MODEL: rOpts.model || (task.name === TaskName.Gather ? 'latest_short' : 'phone_call')},
-        ...(rOpts.naicsCode > 0 &&
-          {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
+        ...{GOOGLE_SPEECH_MODEL: rOpts.model || model},
+        ...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
+        GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
      };
    }
-    else if (['aws', 'polly'].includes(rOpts.vendor)) {
+    else if (['aws', 'polly'].includes(vendor)) {
      opts = {
        ...opts,
        ...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
@@ -321,7 +384,7 @@ module.exports = (logger) => {
        }),
      };
    }
-    else if ('microsoft' === rOpts.vendor) {
+    else if ('microsoft' === vendor) {
      opts = {
        ...opts,
        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
@@ -329,7 +392,7 @@ module.exports = (logger) => {
        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
          {AZURE_SPEECH_HINTS: rOpts.hints.map((h) => h.phrase).join(',')}),
        ...(rOpts.altLanguages && rOpts.altLanguages.length > 0 &&
-          {AZURE_SERVICE_ENDPOINT_ID: rOpts.sttCredentials}),
+          {AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
        ...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}),
        ...(rOpts.profanityOption && {AZURE_PROFANITY_OPTION: rOpts.profanityOption}),
        ...(rOpts.azureServiceEndpoint && {AZURE_SERVICE_ENDPOINT: rOpts.azureServiceEndpoint}),
@@ -346,7 +409,7 @@ module.exports = (logger) => {
          {AZURE_SERVICE_ENDPOINT_ID: sttCredentials.custom_stt_endpoint})
      };
    }
-    else if ('nuance' === rOpts.vendor) {
+    else if ('nuance' === vendor) {
      /**
       * Note: all nuance options are in recognizer.nuanceOptions, should migrate
       * other vendor settings to similar nested structure
@@ -354,12 +417,9 @@ module.exports = (logger) => {
      const {nuanceOptions = {}} = rOpts;
      opts = {
        ...opts,
-        ...(sttCredentials.access_token) &&
-          {NUANCE_ACCESS_TOKEN: sttCredentials.access_token},
-        ...(sttCredentials.krypton_endpoint) &&
-          {NUANCE_KRYPTON_ENDPOINT: sttCredentials.krypton_endpoint},
-        ...(nuanceOptions.topic) &&
-          {NUANCE_TOPIC: nuanceOptions.topic},
+        ...(sttCredentials.access_token) && {NUANCE_ACCESS_TOKEN: sttCredentials.access_token},
+        ...(sttCredentials.nuance_stt_uri) && {NUANCE_KRYPTON_ENDPOINT: sttCredentials.nuance_stt_uri},
+        ...(nuanceOptions.topic) && {NUANCE_TOPIC: nuanceOptions.topic},
        ...(nuanceOptions.utteranceDetectionMode) &&
          {NUANCE_UTTERANCE_DETECTION_MODE: nuanceOptions.utteranceDetectionMode},
        ...(nuanceOptions.punctuation || rOpts.punctuation) && {NUANCE_PUNCTUATION: nuanceOptions.punctuation},
@@ -397,7 +457,7 @@ module.exports = (logger) => {
          {NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)},
      };
    }
-    else if ('deepgram' === rOpts.vendor) {
+    else if ('deepgram' === vendor) {
      const {deepgramOptions = {}} = rOpts;
      opts = {
        ...opts,
@@ -441,7 +501,30 @@ module.exports = (logger) => {
          {DEEPGRAM_SPEECH_TAG: deepgramOptions.tag}
      };
    }
-    else if ('ibm' === rOpts.vendor) {
+    else if ('soniox' === vendor) {
+      const {sonioxOptions = {}} = rOpts;
+      const {storage = {}} = sonioxOptions;
+      opts = {
+        ...opts,
+        ...(sttCredentials.api_key) &&
+          {SONIOX_API_KEY: sttCredentials.api_key},
+        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
+          {SONIOX_HINTS: rOpts.hints.join(',')}),
+        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
+          {SONIOX_HINTS: JSON.stringify(rOpts.hints)}),
+        ...(typeof rOpts.hintsBoost === 'number' &&
+          {SONIOX_HINTS_BOOST: rOpts.hintsBoost}),
+        ...(sonioxOptions.model) &&
+          {SONIOX_MODEL: sonioxOptions.model},
+        ...((sonioxOptions.profanityFilter || rOpts.profanityFilter) && {SONIOX_PROFANITY_FILTER: 1}),
+        ...(storage?.id && {SONIOX_STORAGE_ID: storage.id}),
+        ...(storage?.id && storage?.title && {SONIOX_STORAGE_TITLE: storage.title}),
+        ...(storage?.id && storage?.disableStoreAudio && {SONIOX_STORAGE_DISABLE_AUDIO: 1}),
+        ...(storage?.id && storage?.disableStoreTranscript && {SONIOX_STORAGE_DISABLE_TRANSCRIPT: 1}),
+        ...(storage?.id && storage?.disableSearch && {SONIOX_STORAGE_DISABLE_SEARCH: 1})
+      };
+    }
+    else if ('ibm' === vendor) {
      const {ibmOptions = {}} = rOpts;
      opts = {
        ...opts,
@@ -465,7 +548,7 @@ module.exports = (logger) => {
          {IBM_SPEECH_WATSON_LEARNING_OPT_OUT: ibmOptions.watsonLearningOptOut}
      };
    }
-    else if ('nvidia' === rOpts.vendor) {
+    else if ('nvidia' === vendor) {
      const {nvidiaOptions = {}} = rOpts;
      opts = {
        ...opts,
@@ -494,11 +577,29 @@ module.exports = (logger) => {
          {NVIDIA_CUSTOM_CONFIGURATION: JSON.stringify(nvidiaOptions.customConfiguration)}),
      };
    }
+    else if (vendor.startsWith('custom:')) {
+      let {options = {}} = rOpts;
+      const {auth_token, custom_stt_url} = sttCredentials;
+      options = {
+        ...options,
+        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
+        {hints: rOpts.hints}),
+        ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
+        {hints: JSON.stringify(rOpts.hints)}),
+        ...(typeof rOpts.hintsBoost === 'number' && {hintsBoost: rOpts.hintsBoost})
+      };

-    stickyVars[rOpts.vendor].forEach((key) => {
+      opts = {
+        ...opts,
+        JAMBONZ_STT_API_KEY: auth_token,
+        JAMBONZ_STT_URL: custom_stt_url,
+        ...(Object.keys(options).length > 0 && {JAMBONZ_STT_OPTIONS: JSON.stringify(options)}),
+      };
+    }
+
+    (stickyVars[vendor] || []).forEach((key) => {
      if (!opts[key]) opts[key] = '';
    });
-    logger.debug({opts}, 'recognizer channel vars');
    return opts;
  };

@@ -517,25 +618,32 @@ module.exports = (logger) => {
    ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
    ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
    ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
-    ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
    ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);

    ep.removeCustomEventListener(DeepgramTranscriptionEvents.Transcription);
    ep.removeCustomEventListener(DeepgramTranscriptionEvents.Connect);
    ep.removeCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure);

+    ep.removeCustomEventListener(SonioxTranscriptionEvents.Transcription);
+
    ep.removeCustomEventListener(NvidiaTranscriptionEvents.Transcription);
    ep.removeCustomEventListener(NvidiaTranscriptionEvents.TranscriptionComplete);
    ep.removeCustomEventListener(NvidiaTranscriptionEvents.StartOfSpeech);
-    ep.removeCustomEventListener(NvidiaTranscriptionEvents.Error);
    ep.removeCustomEventListener(NvidiaTranscriptionEvents.VadDetected);
+
+    ep.removeCustomEventListener(JambonzTranscriptionEvents.Transcription);
+    ep.removeCustomEventListener(JambonzTranscriptionEvents.Connect);
+    ep.removeCustomEventListener(JambonzTranscriptionEvents.ConnectFailure);
+
+    ep.removeCustomEventListener(JambonzTranscriptionEvents.Error);
  };

  const setSpeechCredentialsAtRuntime = (recognizer) => {
    if (!recognizer) return;
    if (recognizer.vendor === 'nuance') {
-      const {clientId, secret} = recognizer.nuanceOptions || {};
+      const {clientId, secret, kryptonEndpoint} = recognizer.nuanceOptions || {};
      if (clientId && secret) return {client_id: clientId, secret};
+      if (kryptonEndpoint) return {nuance_stt_uri: kryptonEndpoint};
    }
    else if (recognizer.vendor === 'nvidia') {
      const {rivaUri} = recognizer.nvidiaOptions || {};
@@ -545,6 +653,10 @@ module.exports = (logger) => {
      const {apiKey} = recognizer.deepgramOptions || {};
      if (apiKey) return {api_key: apiKey};
    }
+    else if (recognizer.vendor === 'soniox') {
+      const {apiKey} = recognizer.sonioxOptions || {};
+      if (apiKey) return {api_key: apiKey};
+    }
    else if (recognizer.vendor === 'ibm') {
      const {ttsApiKey, ttsRegion, sttApiKey, sttRegion, instanceId} = recognizer.ibmOptions || {};
      if (ttsApiKey || sttApiKey) return {
@@ -561,6 +673,7 @@ module.exports = (logger) => {
    normalizeTranscription,
    setChannelVarsForStt,
    removeSpeechListeners,
-    setSpeechCredentialsAtRuntime
+    setSpeechCredentialsAtRuntime,
+    compileSonioxTranscripts
  };
 };
--- a/lib/utils/ws-requestor.js
+++ b/lib/utils/ws-requestor.js
@@ -219,7 +219,6 @@ class WsRequestor extends BaseRequestor {
  }

  _setHandlers(ws) {
-    this.logger.debug('WsRequestor:_setHandlers');
    ws
      .once('open', this._onOpen.bind(this, ws))
      .once('close', this._onClose.bind(this))
@@ -274,6 +273,7 @@ class WsRequestor extends BaseRequestor {
    }, 'WsRequestor - unexpected response');
    this.emit('connection-failure');
    this.emit('not-ready', new Error(`${res.statusCode} ${res.statusMessage}`));
+    this.connections++;
  }

  _onSocketClosed() {
@@ -338,7 +338,7 @@ class WsRequestor extends BaseRequestor {
      this.logger.info({url: this.url}, `WsRequestor:_recvAck - ack to unknown msgid ${msgid}, discarding`);
      return;
    }
-    this.logger.debug({url: this.url}, `WsRequestor:_recvAck - received response to ${msgid}`);
+    //this.logger.debug({url: this.url}, `WsRequestor:_recvAck - received response to ${msgid}`);
    this.messagesInFlight.delete(msgid);
    const {success} = obj;
    success && success(data);
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "jambonz-feature-server",
-  "version": "v0.8.0",
+  "version": "v0.8.2",
  "main": "app.js",
  "engines": {
    "node": ">= 10.16.0"
@@ -19,17 +19,18 @@
  "bugs": {},
  "scripts": {
    "start": "node app",
-    "test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:JambonzR0ck$:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
+    "test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 ENCRYPTION_SECRET=foobar DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:JambonzR0ck$:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
    "coverage": "./node_modules/.bin/nyc --reporter html --report-dir ./coverage npm run test",
    "jslint": "eslint app.js lib"
  },
  "dependencies": {
    "@jambonz/db-helpers": "^0.7.4",
    "@jambonz/http-health-check": "^0.0.1",
-    "@jambonz/realtimedb-helpers": "^0.6.5",
+    "@jambonz/realtimedb-helpers": "^0.7.0",
+    "@jambonz/speech-utils": "^0.0.12",
    "@jambonz/stats-collector": "^0.1.6",
    "@jambonz/time-series": "^0.2.5",
-    "@jambonz/verb-specifications": "^0.0.3",
+    "@jambonz/verb-specifications": "^0.0.11",
    "@opentelemetry/api": "^1.4.0",
    "@opentelemetry/exporter-jaeger": "^1.9.0",
    "@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
@@ -43,7 +44,7 @@
    "bent": "^7.3.12",
    "debug": "^4.3.4",
    "deepcopy": "^2.1.0",
-    "drachtio-fsmrf": "^3.0.18",
+    "drachtio-fsmrf": "^3.0.20",
    "drachtio-srf": "^4.5.23",
    "express": "^4.18.2",
    "ip": "^1.1.8",
@@ -56,7 +57,7 @@
    "short-uuid": "^4.2.2",
    "sinon": "^15.0.1",
    "to-snake-case": "^1.0.0",
-    "undici": "^5.16.0",
+    "undici": "^5.19.1",
    "uuid-random": "^1.3.2",
    "verify-aws-sns-signature": "^0.1.0",
    "ws": "^8.9.0",
--- a/test/gather-tests.js
+++ b/test/gather-tests.js
@@ -206,7 +206,49 @@ test('\'gather\' test - deepgram', async(t) => {
    let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
    //console.log(JSON.stringify(obj));
    t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
-      'gather: succeeds when using  deepgram credentials');
+      'gather: succeeds when using deepgram credentials');
+    disconnect();
+  } catch (err) {
+    console.log(`error received: ${err}`);
+    disconnect();
+    t.error(err);
+  }
+});
+
+test('\'gather\' test - soniox', async(t) => {
+  if (!process.env.SONIOX_API_KEY ) {
+    t.pass('skipping soniox tests');
+    return t.end();
+  }
+  clearModule.all();
+  const {srf, disconnect} = require('../app');
+
+  try {
+    await connect(srf);
+    // GIVEN
+    let verbs = [
+      {
+        "verb": "gather",
+        "input": ["speech"],
+        "recognizer": {
+          "vendor": "deepgram",
+          "hints": ["customer support", "sales", "human resources", "HR"],
+          "deepgramOptions": {
+            "apiKey": process.env.SONIOX_API_KEY
+          }
+        },
+        "timeout": 10,
+        "actionHook": "/actionHook"
+      }
+    ];
+    let from = "gather_success";
+    provisionCallHook(from, verbs);
+    // THEN
+    await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
+    let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
+    console.log(JSON.stringify(obj));
+    t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
+      'gather: succeeds when using soniox credentials');

    disconnect();
  } catch (err) {
--- a/test/transcribe-tests.js
+++ b/test/transcribe-tests.js
@@ -143,7 +143,7 @@ test('\'transcribe\' test - deepgram', async(t) => {
      {
        "verb": "transcribe",
        "recognizer": {
-          "vendor": "aws",
+          "vendor": "deepgram",
          "hints": ["customer support", "sales", "human resources", "HR"],
          "deepgramOptions": {
            "apiKey": process.env.DEEPGRAM_API_KEY
@@ -160,6 +160,47 @@ test('\'transcribe\' test - deepgram', async(t) => {
    t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
      'transcribe: succeeds when using deepgram credentials');

+    disconnect();
+  } catch (err) {
+    console.log(`error received: ${err}`);
+    disconnect();
+    t.error(err);
+  }
+});
+
+test('\'transcribe\' test - soniox', async(t) => {
+  if (!process.env.SONIOX_API_KEY ) {
+    t.pass('skipping soniox tests');
+    return t.end();
+  }
+  clearModule.all();
+  const {srf, disconnect} = require('../app');
+
+  try {
+    await connect(srf);
+    // GIVEN
+    let verbs = [
+      {
+        "verb": "transcribe",
+        "recognizer": {
+          "vendor": "soniox",
+          "hints": ["customer support", "sales", "human resources", "HR"],
+          "deepgramOptions": {
+            "apiKey": process.env.SONIOX_API_KEY
+          }
+        },
+        "transcriptionHook": "/transcriptionHook"
+      }
+    ];
+    let from = "gather_success";
+    provisionCallHook(from, verbs);
+    // THEN
+    await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
+    let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
+    console.log(JSON.stringify(obj));
+    t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
+      'transcribe: succeeds when using soniox credentials');
+
    disconnect();
  } catch (err) {
    console.log(`error received: ${err}`);
Author	SHA1	Message	Date
Hoan Luu Huu	11c5047465	fix: Re-invite sip rec does not update media (#300 ) * fix: Re-invite sip rec does not update media * fix: Re-invite sip rec does not update media	2023-04-05 09:46:32 -04:00
Dave Horton	e19ea629f0	response to siprec invite should have a:recvonly if offer had a:sendonly (#298 )	2023-04-04 21:02:21 -04:00
Antony Jukes	fe529c6bfb	removed incorrect "this" from this.target.auth as it actually a local const. (#296 )	2023-04-03 11:13:12 -04:00
Dave Horton	e980b82ec4	update to speech utils with improved microsoft tts	2023-04-01 13:20:59 -04:00
Hoan Luu Huu	318ca19791	fix: update speech utils version (#295 ) * fix: update speech utils version * update package-lock.json --------- Co-authored-by: Dave Horton <daveh@beachdognet.com>	2023-04-01 11:35:13 -04:00
Dave Horton	e2bd211346	update to latest speech-utils	2023-03-31 16:50:46 -04:00
Dave Horton	410c07fae6	further fix for google model	2023-03-31 12:37:04 -04:00
Dave Horton	2ebfbfb3d8	google STT: when altLanguges are used default to a model that supports it	2023-03-31 12:31:14 -04:00
Dave Horton	a29795839d	Bugfix/bot mode restart (#292 ) * restart background gather if we get a new config with bargein=enable and changes to input types * stop background gather properly before restarting * fix: sticky background gather tasks must not be restarted if we have a new background gather * fix undefined reference * safety	2023-03-31 09:35:23 -04:00
Hoan Luu Huu	28088a4cdd	feat: queue play audio (#290 ) * feat: queue play audio * fix: typo * fix: typo * fix: typo * fix: typo * fix: typo * fix: typo * fix: typo * fix: typo * fix: typo * fix: typo	2023-03-30 15:31:54 -04:00
Dave Horton	afb381eec9	bugfix: setting altLanguages on Azure once left it turned on	2023-03-29 08:49:34 -04:00
Dave Horton	ed00ccb681	bump version	2023-03-28 14:14:25 -04:00
Dave Horton	6e945dde9a	google stt fixes, including defaulting to phone_call model based on c… (#288 ) * google stt fixes, including defaulting to phone_call model based on comparative model testing * lint error	2023-03-28 10:02:03 -04:00
Dave Horton	efdea3e514	gather defaults to multiple utterances	2023-03-27 15:53:01 -04:00
Dave Horton	5131d524ce	bugfix: allow for empty transcripts that nuance returns	2023-03-27 14:13:50 -04:00
Anton Voylenko	c0114015ea	check encryption env on start (#286 )	2023-03-26 15:45:20 -04:00
Anton Voylenko	a293ec09d0	add ENCRYPTION_SECRET variable (#283 ) * add ENCRYPTION_SECRET variable * add env for tests	2023-03-26 14:52:58 -04:00
Dave Horton	f71ae83ce4	bugfix: nuance on-prem stt	2023-03-26 14:26:36 -04:00
Hoan Luu Huu	0dd161913c	fix: gather task should clear dtmf event before resolve (#284 ) Co-authored-by: Quan HL <quanluuhoang8@gmail.com>	2023-03-26 12:32:51 -04:00
Dave Horton	63ab554908	google STT: default to command_and_search for Gather, as latest_short seems to have issues, various other fixes (#285 )	2023-03-26 12:20:03 -04:00
Dave Horton	e1bd075ebc	support for nuance on-prem stt/tts	2023-03-25 12:08:54 -04:00
Dave Horton	9de89258a1	update speech-utils@0.0.8	2023-03-24 14:50:08 -04:00
Dave Horton	145ed488db	make the feature committed in `dd4d9aa` enabled only if JAMBONES_GATHER_CLEAR_GLOBAL_HINTS_ON_EMPTY_HINTS is set, as it is a behavior change	2023-03-23 07:54:39 -04:00
Dave Horton	c06a43adfa	Gather: bugfix for alternate languages with Azure	2023-03-22 14:32:25 -04:00
Dave Horton	bebc82d194	bugfix: gather with google STT does not need to restart transcribing after end of utterance	2023-03-21 15:46:00 -04:00
Dave Horton	cdc82e99ff	add minor logging	2023-03-21 12:35:02 -04:00
Dave Horton	dd4d9aa261	Gather: if an empty array of hints are supplied, this signals we should mask global hints for this collection	2023-03-21 12:16:12 -04:00
Dave Horton	1dcf9ee5a2	update to speech-utils@0.0.6	2023-03-21 08:27:25 -04:00
Dave Horton	4b28db0946	update to speech-utils@.0.0.5	2023-03-21 08:00:52 -04:00
Dave Horton	e7ff76b938	update to speech-utils with AWS tts bugfix	2023-03-20 15:35:20 -04:00
Dave Horton	f245275983	gather: remove duplicate and null hints, restart timeout on interim transcripts	2023-03-20 15:34:55 -04:00
Dave Horton	690deed89d	prune unused logging	2023-03-19 12:04:02 -04:00
Dave Horton	26053ec709	update speech-utils with support for more audio formats for custom tts	2023-03-15 09:14:41 -04:00
Dave Horton	34e8203338	update to realtime-dbhelpers that factored out speech-utils	2023-03-14 10:07:29 -04:00
Hoan Luu Huu	7be3c64116	feat: update speech-ultil version 1.0.1 (#275 ) * feat: update speech-ultil version 1.0.1 * feat: update speech-ultil version 1.0.1 * more fixes for custom stt * more fixes * fixes * update drachtio-fsmrf * pass url to mod_jambonz_transcribe * transcription utils: handle custom results * handle custom speech vendor errors * add support for hints to custom speech * change to custom speech options * send hints as an array for custom speech * update latest speech-utils * transcribe: changes to support soniox * bugfix: soniox transcribe --------- Co-authored-by: Quan HL <quanluuhoang8@gmail.com> Co-authored-by: Dave Horton <daveh@beachdognet.com>	2023-03-12 19:38:36 -04:00
Hoan Luu Huu	f71d3aed8b	feat: forward PAI from inbound call to dial outbound call (#280 ) * feat: forward PAI from inbound call to dial outbound call * fix: review comment --------- Co-authored-by: Quan HL <quanluuhoang8@gmail.com>	2023-03-09 08:58:19 -05:00
Hoan Luu Huu	5ab24337b2	fix: use TTS_FAILURE alert type for synthAudio (#278 ) Co-authored-by: Quan HL <quanluuhoang8@gmail.com>	2023-03-08 07:42:06 -05:00
Dave Horton	2af76d94a6	bugfix: repeated ws failures should stop eventually	2023-03-07 16:29:00 -05:00
Dave Horton	4919c05181	add verb:status for play events (#274 )	2023-03-03 15:56:50 -05:00
Dave Horton	3084a9d6ba	#241 - gather bargein on Nuance has to be based on start of speech event (#246 )	2023-03-03 13:39:23 -05:00
Dave Horton	1c683f1142	initial changes for soniox (#270 ) * initial changes for soniox * changes to gather for soniox * parse soniox stt results * handle <end> token for soniox * soniox: handle empty array of words * support for soniox hints * add soniox storage options * update to verb specs * add support for transcribe * compile soniox transcripts * gather: kill no input timer for soniox when we get interim results * fix buffering of soniox transcripts * fix for compiling soniox transcript * another fix for compiling soniox transcript * another fix * handling of <end> token * fix soniox bug * gather: fixes for soniox continous asr * fix undefined variable reference * fix prev commit * bugfix: allow verb_status requests * gather: for soniox no need to restart transcription after final transcription received * update verb specs * update verb specs, fixes for continuous asr:	2023-03-03 13:37:55 -05:00
Dave Horton	ab1947e23e	bugfix: gather minBargeinWordCount defaults to 1	2023-02-24 10:27:05 -05:00
Dave Horton	5527abff09	bump version	2023-02-24 10:04:25 -05:00
Dave Horton	68827112fc	further fix for early hints match in gather	2023-02-23 13:10:46 -05:00
Dave Horton	8a9a2df128	early hints fix that was not merged	2023-02-23 12:54:21 -05:00
Dave Horton	3a3544a5e8	remove some wordy logging	2023-02-23 12:32:41 -05:00
Dave Horton	cbeb706946	update to latest @jambonz/verb-specifications with less verbose logging	2023-02-23 12:16:14 -05:00
Dave Horton	f005262615	docs	2023-02-23 10:48:09 -05:00
Snyk bot	67ec28484c	fix: package.json & package-lock.json to reduce vulnerabilities (#265 ) The following vulnerabilities are fixed with an upgrade: - https://snyk.io/vuln/SNYK-JS-UNDICI-3323844 - https://snyk.io/vuln/SNYK-JS-UNDICI-3323845	2023-02-23 10:26:06 -05:00
two56	803a944240	Use the request from CallSession for cancel (#268 ) * Use the req from CallSession for cancel * Check cs is set --------- Co-authored-by: Matt Preskett <matt.preskett@netcall.com>	2023-02-23 09:13:44 -05:00
Snyk bot	a5cd342e46	fix: Dockerfile to reduce vulnerabilities (#269 )	2023-02-22 14:04:39 -05:00
EgleH	e91feb64f5	Update node base image to node:18.14.0-alpine3.16 (#267 )	2023-02-21 07:54:00 -05:00