fixed undefine issue when setting tts streaming channel vars (#1456 )

when dialing the B leg we check to see if we are using opus on the A leg, and if so we outdial B with opus first; however we were incorrectly checking the SDP on the A leg invite not the 200 OK we send back (#1455 )
use timeout on HTTP requests (#1453 )
2026-02-10 16:31:12 +00:00 · 2025-12-02 19:46:28 -05:00 · 2025-12-02 19:22:20 -05:00 · 2025-12-02 07:41:47 -05:00 · 2025-11-30 11:56:56 -05:00 · 2025-11-30 11:55:20 -05:00
12 changed files with 85 additions and 25 deletions
--- a/lib/config.js
+++ b/lib/config.js
@@ -119,7 +119,7 @@ const ENCRYPTION_SECRET = process.env.ENCRYPTION_SECRET;
 const HTTP_POOL = process.env.HTTP_POOL && parseInt(process.env.HTTP_POOL);
 const HTTP_POOLSIZE = parseInt(process.env.HTTP_POOLSIZE, 10) || 10;
 const HTTP_PIPELINING = parseInt(process.env.HTTP_PIPELINING, 10) || 1;
-const HTTP_TIMEOUT = 10000;
+const HTTP_TIMEOUT = parseInt(process.env.JAMBONES_HTTP_TIMEOUT, 10) || 10000;
 const HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
 const HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
 const HTTP_PROXY_PROTOCOL = process.env.JAMBONES_HTTP_PROXY_PROTOCOL || 'http';
--- a/lib/session/call-session.js
+++ b/lib/session/call-session.js
@@ -1195,7 +1195,8 @@ class CallSession extends Emitter {
            speech_credential_sid: credential.speech_credential_sid,
            client_id: credential.client_id,
            client_key: credential.client_key,
-            user_id: credential.user_id
+            user_id: credential.user_id,
+            houndify_server_uri: credential.houndify_server_uri
          };
        }
        else if ('deepgramflux' === vendor) {
--- a/lib/tasks/dial.js
+++ b/lib/tasks/dial.js
@@ -576,7 +576,7 @@ class TaskDial extends Task {
      proxy: `sip:${sbcAddress}`,
      callingNumber: this.callerId || fromUri.user,
      ...(this.callerName && {callingName: this.callerName}),
-      opusFirst: isOpusFirst(this.cs.ep.remote.sdp),
+      opusFirst: isOpusFirst(this.cs.ep.local.sdp),
      isVideoCall: this.cs.ep.remote.sdp.includes('m=video')
    };

--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -258,7 +258,7 @@ class TaskGather extends SttTask {
            startDtmfListener();
          }
          this._stopVad();
-          if (!this.killed) {
+          if (!this.killed && !this.resolved) {
            startListening(cs, ep);
            if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
              this.logger.debug('Gather:exec - starting transcription timers after say completes');
@@ -296,7 +296,7 @@ class TaskGather extends SttTask {
            startDtmfListener();
          }
          this._stopVad();
-          if (!this.killed) {
+          if (!this.killed && !this.resolved) {
            startListening(cs, ep);
            if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
              this.logger.debug('Gather:exec - starting transcription timers after play completes');
@@ -1161,7 +1161,7 @@ class TaskGather extends SttTask {
  }

  async _startFallback(cs, ep, evt) {
-    if (this.canFallback) {
+    if (this.canFallback()) {
      this._stopTranscribing(ep);
      try {
        this.logger.debug('gather:_startFallback');
--- a/lib/tasks/stt-task.js
+++ b/lib/tasks/stt-task.js
@@ -171,7 +171,7 @@ class SttTask extends Task {
      try {
        this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
      } catch (error) {
-        if (this.canFallback) {
+        if (this.canFallback()) {
          this.notifyError(
            {
              msg: 'ASR error', details:`Invalid vendor ${this.vendor}, Error: ${error}`,
@@ -260,8 +260,19 @@ class SttTask extends Task {
    ep.addCustomEventListener(event, handler);
  }

-  removeCustomEventListeners() {
-    this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
+  removeCustomEventListeners(ep) {
+    if (ep) {
+      // for specific endpoint
+      this.eventHandlers.filter((h) => h.ep === ep).forEach((h) => {
+        h.ep.removeCustomEventListener(h.event, h.handler);
+      });
+      this.eventHandlers = this.eventHandlers.filter((h) => h.ep !== ep);
+      return;
+    } else {
+      // for all endpoints
+      this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
+      this.eventHandlers = [];
+    }
  }

  async _initSpeechCredentials(cs, vendor, label) {
@@ -329,11 +340,13 @@ class SttTask extends Task {
    return credentials;
  }

-  get canFallback() {
+  canFallback() {
    return this.fallbackVendor && this.isHandledByPrimaryProvider && !this.cs.hasFallbackAsr;
  }

-  async _initFallback() {
+  // ep is optional for gather or any verb that have single ep,
+  // but transcribe does need as it might has 2 eps
+  async _initFallback(ep) {
    assert(this.fallbackVendor, 'fallback failed without fallbackVendor configuration');
    this.logger.info(`Failed to use primary STT provider, fallback to ${this.fallbackVendor}`);
    this.isHandledByPrimaryProvider = false;
@@ -346,7 +359,7 @@ class SttTask extends Task {
    this.data.recognizer.label = this.label;
    this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
    // cleanup previous listener from previous vendor
-    this.removeCustomEventListeners();
+    this.removeCustomEventListeners(ep);
  }

  async compileHintsForCobalt(ep, hostport, model, token, hints) {
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -70,6 +70,9 @@ class TaskTranscribe extends SttTask {
    this._bufferedTranscripts = [ [], [] ];  // for channel 1 and 2
    this.bugname_prefix = 'transcribe_';
    this.paused = false;
+    // fallback flags
+    this.isHandledByPrimaryProviderForEp1 = true;
+    this.isHandledByPrimaryProviderForEp2 = true;
  }

  get name() { return TaskName.Transcribe; }
@@ -776,7 +779,7 @@ class TaskTranscribe extends SttTask {
  }

  async _startFallback(cs, _ep, evt) {
-    if (this.canFallback) {
+    if (this.canFallback(_ep)) {
      _ep.stopTranscription({
        vendor: this.vendor,
        bugname: this.bugname,
@@ -786,7 +789,7 @@ class TaskTranscribe extends SttTask {
      try {
        this.notifyError({ msg: 'ASR error',
          details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'in progress'});
-        await this._initFallback();
+        await this._initFallback(_ep);
        let channel = 1;
        if (this.ep !== _ep) {
          channel = 2;
@@ -895,6 +898,41 @@ class TaskTranscribe extends SttTask {
    if (this._asrTimer) clearTimeout(this._asrTimer);
    this._asrTimer = null;
  }
+
+  // We need to keep track the fallback is happened for each endpoint
+  // override the canFallback and _initFallback methods to make sure that
+  // we only fallback once per endpoint
+  // we want to keep track this on task level instead of endpoint level
+  // because the endpoint instance is used across multiple tasks.
+  canFallback(ep) {
+    let isHandledByPrimaryProvider = this.isHandledByPrimaryProvider;
+    if (ep === this.ep) {
+      isHandledByPrimaryProvider = this.isHandledByPrimaryProviderForEp1;
+    } else if (ep === this.ep2) {
+      isHandledByPrimaryProvider = this.isHandledByPrimaryProviderForEp2;
+    }
+
+    const isOneOfEndpointAlreadyFallenBack = !!this.ep && !!this.ep2 &&
+      this.isHandledByPrimaryProviderForEp1 !== this.isHandledByPrimaryProviderForEp2;
+
+    // fallback is configured
+    return this.fallbackVendor &&
+      // has this endpoint already fallen back
+      isHandledByPrimaryProvider &&
+      // in global level, is there any fallback is already happened
+      // one fallen endpoint will mark cs.hasFallbackAsr to true,
+      // so if one endpoint was fallen, the other endpoint would be able to fallback.
+      (isOneOfEndpointAlreadyFallenBack || !this.cs.hasFallbackAsr);
+  }
+
+  _initFallback(ep) {
+    if (ep === this.ep) {
+      this.isHandledByPrimaryProviderForEp1 = false;
+    } else if (ep === this.ep2) {
+      this.isHandledByPrimaryProviderForEp2 = false;
+    }
+    return super._initFallback(ep);
+  }
 }

 module.exports = TaskTranscribe;
--- a/lib/tasks/tts-task.js
+++ b/lib/tasks/tts-task.js
@@ -89,8 +89,9 @@ class TtsTask extends Task {
    // api_key, model_id, api_uri, custom_tts_streaming_url, and auth_token are encoded in the credentials
    // allow them to be overriden via config, using options
    // give preference to options passed in via config
-    const local_options = {...JSON.parse(options), ...this.options};
-    const local_voice_settings = {...JSON.parse(options).voice_settings, ...this.options.voice_settings};
+    const parsed_options = options ? JSON.parse(options) : {};
+    const local_options = {...parsed_options, ...this.options};
+    const local_voice_settings = {...(parsed_options.voice_settings || {}), ...(this.options.voice_settings || {})};
    const local_api_key =  local_options.api_key ?? api_key;
    const local_model_id = local_options.model_id ?? model_id;
    const local_api_uri = local_options.api_uri ?? api_uri;
--- a/lib/utils/db-utils.js
+++ b/lib/utils/db-utils.js
@@ -152,6 +152,7 @@ const speechMapper = (cred) => {
      obj.client_id = o.client_id;
      obj.client_key = o.client_key;
      obj.user_id = o.user_id;
+      obj.houndify_server_uri = o.houndify_server_uri;
    }
    else if ('voxist' === obj.vendor) {
      const o = JSON.parse(decrypt(credential));
--- a/lib/utils/http-requestor.js
+++ b/lib/utils/http-requestor.js
@@ -191,7 +191,7 @@ class HttpRequestor extends BaseRequestor {
        method,
        headers: hdrs,
        ...('POST' === method && {body: JSON.stringify(payload)}),
-        timeout: HTTP_TIMEOUT,
+        headersTimeout: HTTP_TIMEOUT,
        followRedirects: false
      };

--- a/lib/utils/transcription-utils.js
+++ b/lib/utils/transcription-utils.js
@@ -920,7 +920,7 @@ module.exports = (logger) => {
        ...(rOpts.initialSpeechTimeoutMs > 0 &&
          {AZURE_INITIAL_SPEECH_TIMEOUT_MS: rOpts.initialSpeechTimeoutMs}),
        ...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}),
-        ...(rOpts.audioLogging && {AZURE_AUDIO_LOGGING: 1}),
+        ...(azureOptions.audioLogging && {AZURE_AUDIO_LOGGING: 1}),
        ...{AZURE_USE_OUTPUT_FORMAT_DETAILED: 1},
        ...(azureOptions.speechSegmentationSilenceTimeoutMs &&
          {AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS: azureOptions.speechSegmentationSilenceTimeoutMs}),
@@ -1226,8 +1226,10 @@ module.exports = (logger) => {
        audioFormat, enableNoiseReduction, enableProfanityFilter, enablePunctuation,
        enableCapitalization, confidenceThreshold, enableDisfluencyFilter,
        maxResults, enableWordTimestamps, maxAlternatives, partialTranscriptInterval,
-        sessionTimeout, connectionTimeout, customVocabulary, languageModel
+        sessionTimeout, connectionTimeout, customVocabulary, languageModel,
+        requestInfo, sampleRate
      } = rOpts.houndifyOptions || {};
+      const audioEndpointUri = audioEndpoint || sttCredentials.houndify_server_uri;

      opts = {
        ...opts,
@@ -1263,10 +1265,12 @@ module.exports = (logger) => {
        ...(country && {HOUNDIFY_COUNTRY: country}),
        ...(timeZone && {HOUNDIFY_TIMEZONE: timeZone}),
        ...(domain && {HOUNDIFY_DOMAIN: domain}),
-        ...(audioEndpoint && {HOUNDIFY_AUDIO_ENDPOINT: audioEndpoint}),
+        ...(audioEndpointUri && {HOUNDIFY_AUDIO_ENDPOINT: audioEndpointUri}),
        ...(customVocabulary && {HOUNDIFY_CUSTOM_VOCABULARY:
          Array.isArray(customVocabulary) ? customVocabulary.join(',') : customVocabulary}),
        ...(languageModel && {HOUNDIFY_LANGUAGE_MODEL: languageModel}),
+        ...(requestInfo && {HOUNDIFY_REQUEST_INFO: JSON.stringify(requestInfo)}),
+        ...(sampleRate && {HOUNDIFY_SAMPLING_RATE: sampleRate}),
      };
    }
    else if ('voxist' === vendor) {
--- a/package-lock.json
+++ b/package-lock.json
@@ -18,7 +18,7 @@
        "@jambonz/speech-utils": "^0.2.26",
        "@jambonz/stats-collector": "^0.1.10",
        "@jambonz/time-series": "^0.2.14",
-        "@jambonz/verb-specifications": "^0.0.119",
+        "@jambonz/verb-specifications": "^0.0.122",
        "@modelcontextprotocol/sdk": "^1.9.0",
        "@opentelemetry/api": "^1.8.0",
        "@opentelemetry/exporter-jaeger": "^1.23.0",
@@ -1533,9 +1533,9 @@
      }
    },
    "node_modules/@jambonz/verb-specifications": {
-      "version": "0.0.119",
-      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.119.tgz",
-      "integrity": "sha512-AQsZ7EY2bBOjdhufKtZpdrhFobo6LBMLkuDJY058Q3qxGBtlZr3Wx9zrlVjJuNOGCEJSmMP7Gr/EtjtFFX2iTw==",
+      "version": "0.0.122",
+      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.122.tgz",
+      "integrity": "sha512-7xqaULhKFywJ2ZuyiYt77iiJwJ+8b98Zt1X4+OqZ7Cdjhfo7S6KnR66XRVJHnekXbmfVv58kB0KWUux5TG//Sw==",
      "license": "MIT",
      "dependencies": {
        "debug": "^4.3.4",
@@ -6543,6 +6543,8 @@
    },
    "node_modules/microsoft-cognitiveservices-speech-sdk/node_modules/utf-8-validate": {
      "version": "5.0.10",
+      "resolved": "https://registry.npmjs.org/utf-8-validate/-/utf-8-validate-5.0.10.tgz",
+      "integrity": "sha512-Z6czzLq4u8fPOyx7TU6X3dvUZVvoJmxSQ+IcrlmagKhilxlhZgxPK6C5Jqbkw1IDUmFTM+cz9QDnnLTwDz/2gQ==",
      "hasInstallScript": true,
      "license": "MIT",
      "optional": true,
--- a/package.json
+++ b/package.json
@@ -34,7 +34,7 @@
    "@jambonz/speech-utils": "^0.2.26",
    "@jambonz/stats-collector": "^0.1.10",
    "@jambonz/time-series": "^0.2.14",
-    "@jambonz/verb-specifications": "^0.0.119",
+    "@jambonz/verb-specifications": "^0.0.122",
    "@modelcontextprotocol/sdk": "^1.9.0",
    "@opentelemetry/api": "^1.8.0",
    "@opentelemetry/exporter-jaeger": "^1.23.0",
Author	SHA1	Message	Date
Hoan Luu Huu	edb4d21ce1	fixed undefine issue when setting tts streaming channel vars (#1456 )	2025-12-02 19:46:28 -05:00
Dave Horton	8048e9cf88	when dialing the B leg we check to see if we are using opus on the A leg, and if so we outdial B with opus first; however we were incorrectly checking the SDP on the A leg invite not the 200 OK we send back (#1455 )	2025-12-02 19:22:20 -05:00
Sam Machin	451feafed4	use timeout on HTTP requests (#1453 )	2025-12-02 07:41:47 -05:00
Ed Robbins	7f1543a0f3	Add ability to enable/disable Azure audio logging via azureOptions (#1432 )	2025-11-30 11:56:56 -05:00
Hoan Luu Huu	83955ba972	SoundHound support audio endpoint from speech credential (#1446 ) * SoundHound support audio endpoint from speech credential * add requestInfo and sampleRate to houndify channel variable * add requestInfo and sampleRate to houndify channel variable * wip * wip * wip * wip * wip * wip * wip	2025-11-30 11:55:20 -05:00
Hoan Luu Huu	a5fa5fce5b	Fixed transcribe 2 legs cannot fallback (#1451 ) * fixed transcribe cannot fallback for specific endpoint * wip * wip * wip * wip * wip * wip * wip * wip	2025-11-28 21:43:05 -05:00
Dave Horton	cc1751f500	fix race condition where gather resolves with speech transcript but t… (#1449 ) * fix race condition where gather resolves with speech transcript but timeout timer gets set after the resolve and is left running after gather completes * remove unneeded line of code	2025-11-27 11:44:49 -06:00