fix #1466 : (#1467 )

* fix #1466: * do not send tts streaming events when we are not doing tts streaming
Add configurable say chunk size (#1461 )
2026-01-25 02:07:56 +00:00 · 2025-12-09 09:43:53 -05:00 · 2025-12-08 10:54:27 -05:00 · 2025-12-08 10:44:20 -05:00 · 2025-12-03 07:16:25 -05:00 · 2025-12-02 19:46:28 -05:00
17 changed files with 151 additions and 48 deletions
--- a/lib/config.js
+++ b/lib/config.js
@@ -119,7 +119,7 @@ const ENCRYPTION_SECRET = process.env.ENCRYPTION_SECRET;
 const HTTP_POOL = process.env.HTTP_POOL && parseInt(process.env.HTTP_POOL);
 const HTTP_POOLSIZE = parseInt(process.env.HTTP_POOLSIZE, 10) || 10;
 const HTTP_PIPELINING = parseInt(process.env.HTTP_PIPELINING, 10) || 1;
-const HTTP_TIMEOUT = 10000;
+const HTTP_TIMEOUT = parseInt(process.env.JAMBONES_HTTP_TIMEOUT, 10) || 10000;
 const HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
 const HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
 const HTTP_PROXY_PROTOCOL = process.env.JAMBONES_HTTP_PROXY_PROTOCOL || 'http';
@@ -139,6 +139,10 @@ const JAMBONES_USE_FREESWITCH_TIMER_FD = process.env.JAMBONES_USE_FREESWITCH_TIM
 const JAMBONES_DIAL_SBC_FOR_REGISTERED_USER = process.env.JAMBONES_DIAL_SBC_FOR_REGISTERED_USER || false;
 const JAMBONES_MEDIA_TIMEOUT_MS = process.env.JAMBONES_MEDIA_TIMEOUT_MS || 0;
 const JAMBONES_MEDIA_HOLD_TIMEOUT_MS = process.env.JAMBONES_MEDIA_HOLD_TIMEOUT_MS || 0;
+
+/* say / tts */
+const JAMBONES_SAY_CHUNK_SIZE = parseInt(process.env.JAMBONES_SAY_CHUNK_SIZE, 10) || 900;
+
 // jambonz
 const JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS =
  process.env.JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS;
@@ -231,5 +235,6 @@ module.exports = {
  JAMBONES_DIAL_SBC_FOR_REGISTERED_USER,
  JAMBONES_MEDIA_TIMEOUT_MS,
  JAMBONES_MEDIA_HOLD_TIMEOUT_MS,
+  JAMBONES_SAY_CHUNK_SIZE,
  JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS,
 };
--- a/lib/http-routes/api/create-call.js
+++ b/lib/http-routes/api/create-call.js
@@ -291,7 +291,7 @@ router.post('/',
            }, {
              ...(account.enable_debug_log && {level: 'debug'})
            });
-            app.requestor.logger = app.notifier.logger = sipLogger;
+            app.requestor.logger = app.notifier.logger = restDial.logger = sipLogger;
            const callInfo = new CallInfo({
              direction: CallDirection.Outbound,
              req: inviteReq,
--- a/lib/session/call-session.js
+++ b/lib/session/call-session.js
@@ -927,7 +927,7 @@ class CallSession extends Emitter {
        this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream enabled');
      } else {
        this.logger.debug(
-          'CallSession:enableBackgroundTtsStream - ignoring request as call does not have required conditions');
+          'CallSession:enableBackgroundTtsStream - ignoring request; conditions not met (probably not using ws api)');
      }
    }  catch (err) {
      this.logger.info({err, say}, 'CallSession:enableBackgroundTtsStream - Error creating background tts stream task');
@@ -941,9 +941,11 @@ class CallSession extends Emitter {
    }
  }
  clearTtsStream() {
-    this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
-      .catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
-    this.ttsStreamingBuffer?.clear();
+    if (this.isTtsStreamEnabled) {
+      this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
+        .catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
+      this.ttsStreamingBuffer?.clear();
+    }
  }

  startTtsStream() {
@@ -951,9 +953,11 @@ class CallSession extends Emitter {
  }

  stopTtsStream() {
-    this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
-      .catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
-    this.ttsStreamingBuffer?.stop();
+    if (this.isTtsStreamEnabled) {
+      this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
+        .catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
+      this.ttsStreamingBuffer?.stop();
+    }
  }

  async enableBotMode(gather, autoEnable) {
@@ -979,7 +983,7 @@ class CallSession extends Emitter {
      task.sticky = autoEnable;
      // listen to the bargein-done from background manager
      this.backgroundTaskManager.on('bargeIn-done', () => {
-        if (this.requestor instanceof WsRequestor) {
+        if (this.appIsUsingWebsockets) {
          try {
            this.kill(true);
          } catch (err) {}
@@ -1193,7 +1197,8 @@ class CallSession extends Emitter {
            speech_credential_sid: credential.speech_credential_sid,
            client_id: credential.client_id,
            client_key: credential.client_key,
-            user_id: credential.user_id
+            user_id: credential.user_id,
+            houndify_server_uri: credential.houndify_server_uri
          };
        }
        else if ('deepgramflux' === vendor) {
@@ -1337,7 +1342,7 @@ class CallSession extends Emitter {
      }

      if (0 === this.tasks.length &&
-        this.requestor instanceof WsRequestor &&
+        this.appIsUsingWebsockets &&
        !this.requestor.closedGracefully &&
        !this.callGone &&
        !this.isConfirmCallSession
@@ -3023,14 +3028,14 @@ Duration=${duration} `
   */

  _notifyTaskError(obj) {
-    if (this.requestor instanceof WsRequestor) {
+    if (this.appIsUsingWebsockets) {
      this.requestor.request('jambonz:error', '/error', obj)
        .catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskError - Error sending'));
    }
  }

  _notifyTaskStatus(task, evt) {
-    if (this.notifyEvents && this.requestor instanceof WsRequestor) {
+    if (this.notifyEvents && this.appIsUsingWebsockets) {
      const obj = {...evt, id: task.id, name: task.name};
      this.requestor.request('verb:status', '/status', obj)
        .catch((err) => this.logger.debug({err}, 'CallSession:_notifyTaskStatus - Error sending'));
@@ -3082,7 +3087,7 @@ Duration=${duration} `
  }

  _clearTasks(backgroundGather, evt) {
-    if (this.requestor instanceof WsRequestor && !backgroundGather.cleared) {
+    if (this.appIsUsingWebsockets && !backgroundGather.cleared) {
      this.logger.debug({evt}, 'CallSession:_clearTasks on event from background gather');
      try {
        backgroundGather.cleared = true;
--- a/lib/tasks/dial.js
+++ b/lib/tasks/dial.js
@@ -21,7 +21,7 @@ const {parseUri} = require('drachtio-srf');
 const {ANCHOR_MEDIA_ALWAYS,
  JAMBONZ_DIAL_PAI_HEADER,
  JAMBONES_DIAL_SBC_FOR_REGISTERED_USER} = require('../config');
-const { isOnhold, isOpusFirst } = require('../utils/sdp-utils');
+const { isOnhold, isOpusFirst, getLeadingCodec } = require('../utils/sdp-utils');
 const { normalizeJambones } = require('@jambonz/verb-specifications');
 const { selectHostPort } = require('../utils/network');
 const { sleepFor } = require('../utils/helpers');
@@ -158,6 +158,7 @@ class TaskDial extends Task {

  get canReleaseMedia() {
    const keepAnchor = this.data.anchorMedia ||
+      this.isTranscoding ||
      this.cs.isBackGroundListen ||
      this.cs.onHoldMusic ||
      ANCHOR_MEDIA_ALWAYS ||
@@ -575,7 +576,7 @@ class TaskDial extends Task {
      proxy: `sip:${sbcAddress}`,
      callingNumber: this.callerId || fromUri.user,
      ...(this.callerName && {callingName: this.callerName}),
-      opusFirst: isOpusFirst(this.cs.ep.remote.sdp),
+      opusFirst: isOpusFirst(this.cs.ep.local.sdp),
      isVideoCall: this.cs.ep.remote.sdp.includes('m=video')
    };

@@ -772,6 +773,15 @@ class TaskDial extends Task {
  }

  async _connectSingleDial(cs, sd) {
+    // start connect with dialed leg, this is the soonest we can identify transcoding
+    if (this.epOther && sd.ep) {
+      const codecA = getLeadingCodec(this.epOther.local.sdp);
+      const codecB = getLeadingCodec(sd.ep.remote.sdp);
+      this.isTranscoding = (codecA !== codecB);
+      if (this.isTranscoding) {
+        this.logger.info(`Dial:_connectSingleDial - transcoding from ${codecA} (A leg) to ${codecB} (B leg)`);
+      }
+    }
    if (!this.bridged && !this.canReleaseMedia) {
      this.logger.debug('Dial:_connectSingleDial bridging endpoints');
      if (this.epOther) {
@@ -929,7 +939,6 @@ class TaskDial extends Task {
        this.logger.info({err}, 'Dial:_selectSingleDial - Error boosting audio signal');
      }
    }
-
    /* if we can release the media back to the SBC, do so now */
    if (this.canReleaseMedia || this.shouldExitMediaPathEntirely) {
      setTimeout(this._releaseMedia.bind(this, cs, sd, this.shouldExitMediaPathEntirely), 200);
--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -258,7 +258,7 @@ class TaskGather extends SttTask {
            startDtmfListener();
          }
          this._stopVad();
-          if (!this.killed) {
+          if (!this.killed && !this.resolved) {
            startListening(cs, ep);
            if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
              this.logger.debug('Gather:exec - starting transcription timers after say completes');
@@ -296,7 +296,7 @@ class TaskGather extends SttTask {
            startDtmfListener();
          }
          this._stopVad();
-          if (!this.killed) {
+          if (!this.killed && !this.resolved) {
            startListening(cs, ep);
            if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
              this.logger.debug('Gather:exec - starting transcription timers after play completes');
@@ -881,7 +881,7 @@ class TaskGather extends SttTask {
        this._fillerNoiseOn = false;  // in a race, if we just started audio it may sneak through here
        this.ep.api('uuid_break', this.ep.uuid)
          .catch((err) => this.logger.info(err, 'Error killing audio'));
-        cs.clearTtsStream();
+        if (cs.isTtsStreamEnabled) cs.clearTtsStream();
      }
      return;
    }
@@ -1161,7 +1161,7 @@ class TaskGather extends SttTask {
  }

  async _startFallback(cs, ep, evt) {
-    if (this.canFallback) {
+    if (this.canFallback()) {
      this._stopTranscribing(ep);
      try {
        this.logger.debug('gather:_startFallback');
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -1,6 +1,7 @@
 const assert = require('assert');
 const TtsTask = require('./tts-task');
 const {TaskName, TaskPreconditions} = require('../utils/constants');
+const {JAMBONES_SAY_CHUNK_SIZE} = require('../config');
 const pollySSMLSplit = require('polly-ssml-split');
 const { SpeechCredentialError, NonFatalTaskError } = require('../utils/error');
 const { sleepFor } = require('../utils/helpers');
@@ -31,7 +32,7 @@ const isMatchingEvent = (logger, filename, playbackId, evt) => {
 const breakLengthyTextIfNeeded = (logger, text) => {
  // As The text can be used for tts streaming, we need to break lengthy text into smaller chunks
 // HIGH_WATER_BUFFER_SIZE defined in tts-streaming-buffer.js
-  const chunkSize = 900;
+  const chunkSize = JAMBONES_SAY_CHUNK_SIZE;
  const isSSML = text.startsWith('<speak>');
  const options = {
    softLimit: 100,
--- a/lib/tasks/stt-task.js
+++ b/lib/tasks/stt-task.js
@@ -171,7 +171,7 @@ class SttTask extends Task {
      try {
        this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
      } catch (error) {
-        if (this.canFallback) {
+        if (this.canFallback()) {
          this.notifyError(
            {
              msg: 'ASR error', details:`Invalid vendor ${this.vendor}, Error: ${error}`,
@@ -260,8 +260,19 @@ class SttTask extends Task {
    ep.addCustomEventListener(event, handler);
  }

-  removeCustomEventListeners() {
-    this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
+  removeCustomEventListeners(ep) {
+    if (ep) {
+      // for specific endpoint
+      this.eventHandlers.filter((h) => h.ep === ep).forEach((h) => {
+        h.ep.removeCustomEventListener(h.event, h.handler);
+      });
+      this.eventHandlers = this.eventHandlers.filter((h) => h.ep !== ep);
+      return;
+    } else {
+      // for all endpoints
+      this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
+      this.eventHandlers = [];
+    }
  }

  async _initSpeechCredentials(cs, vendor, label) {
@@ -329,11 +340,13 @@ class SttTask extends Task {
    return credentials;
  }

-  get canFallback() {
+  canFallback() {
    return this.fallbackVendor && this.isHandledByPrimaryProvider && !this.cs.hasFallbackAsr;
  }

-  async _initFallback() {
+  // ep is optional for gather or any verb that have single ep,
+  // but transcribe does need as it might has 2 eps
+  async _initFallback(ep) {
    assert(this.fallbackVendor, 'fallback failed without fallbackVendor configuration');
    this.logger.info(`Failed to use primary STT provider, fallback to ${this.fallbackVendor}`);
    this.isHandledByPrimaryProvider = false;
@@ -346,7 +359,7 @@ class SttTask extends Task {
    this.data.recognizer.label = this.label;
    this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
    // cleanup previous listener from previous vendor
-    this.removeCustomEventListeners();
+    this.removeCustomEventListeners(ep);
  }

  async compileHintsForCobalt(ep, hostport, model, token, hints) {
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -70,6 +70,9 @@ class TaskTranscribe extends SttTask {
    this._bufferedTranscripts = [ [], [] ];  // for channel 1 and 2
    this.bugname_prefix = 'transcribe_';
    this.paused = false;
+    // fallback flags
+    this.isHandledByPrimaryProviderForEp1 = true;
+    this.isHandledByPrimaryProviderForEp2 = true;
  }

  get name() { return TaskName.Transcribe; }
@@ -776,7 +779,7 @@ class TaskTranscribe extends SttTask {
  }

  async _startFallback(cs, _ep, evt) {
-    if (this.canFallback) {
+    if (this.canFallback(_ep)) {
      _ep.stopTranscription({
        vendor: this.vendor,
        bugname: this.bugname,
@@ -786,7 +789,7 @@ class TaskTranscribe extends SttTask {
      try {
        this.notifyError({ msg: 'ASR error',
          details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'in progress'});
-        await this._initFallback();
+        await this._initFallback(_ep);
        let channel = 1;
        if (this.ep !== _ep) {
          channel = 2;
@@ -895,6 +898,41 @@ class TaskTranscribe extends SttTask {
    if (this._asrTimer) clearTimeout(this._asrTimer);
    this._asrTimer = null;
  }
+
+  // We need to keep track the fallback is happened for each endpoint
+  // override the canFallback and _initFallback methods to make sure that
+  // we only fallback once per endpoint
+  // we want to keep track this on task level instead of endpoint level
+  // because the endpoint instance is used across multiple tasks.
+  canFallback(ep) {
+    let isHandledByPrimaryProvider = this.isHandledByPrimaryProvider;
+    if (ep === this.ep) {
+      isHandledByPrimaryProvider = this.isHandledByPrimaryProviderForEp1;
+    } else if (ep === this.ep2) {
+      isHandledByPrimaryProvider = this.isHandledByPrimaryProviderForEp2;
+    }
+
+    const isOneOfEndpointAlreadyFallenBack = !!this.ep && !!this.ep2 &&
+      this.isHandledByPrimaryProviderForEp1 !== this.isHandledByPrimaryProviderForEp2;
+
+    // fallback is configured
+    return this.fallbackVendor &&
+      // has this endpoint already fallen back
+      isHandledByPrimaryProvider &&
+      // in global level, is there any fallback is already happened
+      // one fallen endpoint will mark cs.hasFallbackAsr to true,
+      // so if one endpoint was fallen, the other endpoint would be able to fallback.
+      (isOneOfEndpointAlreadyFallenBack || !this.cs.hasFallbackAsr);
+  }
+
+  _initFallback(ep) {
+    if (ep === this.ep) {
+      this.isHandledByPrimaryProviderForEp1 = false;
+    } else if (ep === this.ep2) {
+      this.isHandledByPrimaryProviderForEp2 = false;
+    }
+    return super._initFallback(ep);
+  }
 }

 module.exports = TaskTranscribe;
--- a/lib/tasks/tts-task.js
+++ b/lib/tasks/tts-task.js
@@ -89,8 +89,9 @@ class TtsTask extends Task {
    // api_key, model_id, api_uri, custom_tts_streaming_url, and auth_token are encoded in the credentials
    // allow them to be overriden via config, using options
    // give preference to options passed in via config
-    const local_options = {...JSON.parse(options), ...this.options};
-    const local_voice_settings = {...JSON.parse(options).voice_settings, ...this.options.voice_settings};
+    const parsed_options = options ? JSON.parse(options) : {};
+    const local_options = {...parsed_options, ...this.options};
+    const local_voice_settings = {...(parsed_options.voice_settings || {}), ...(this.options.voice_settings || {})};
    const local_api_key =  local_options.api_key ?? api_key;
    const local_model_id = local_options.model_id ?? model_id;
    const local_api_uri = local_options.api_uri ?? api_uri;
--- a/lib/utils/db-utils.js
+++ b/lib/utils/db-utils.js
@@ -152,6 +152,7 @@ const speechMapper = (cred) => {
      obj.client_id = o.client_id;
      obj.client_key = o.client_key;
      obj.user_id = o.user_id;
+      obj.houndify_server_uri = o.houndify_server_uri;
    }
    else if ('voxist' === obj.vendor) {
      const o = JSON.parse(decrypt(credential));
--- a/lib/utils/http-requestor.js
+++ b/lib/utils/http-requestor.js
@@ -191,7 +191,7 @@ class HttpRequestor extends BaseRequestor {
        method,
        headers: hdrs,
        ...('POST' === method && {body: JSON.stringify(payload)}),
-        timeout: HTTP_TIMEOUT,
+        headersTimeout: HTTP_TIMEOUT,
        followRedirects: false
      };

--- a/lib/utils/sdp-utils.js
+++ b/lib/utils/sdp-utils.js
@@ -55,11 +55,28 @@ const extractSdpMedia = (sdp) => {
  }
 };

+const getLeadingCodec = (sdp) => {
+  if (!sdp) {
+    return null;
+  }
+
+  const parsed = sdpTransform.parse(sdp);
+  const audio = parsed.media?.find((m) => m.type === 'audio');
+
+  if (!audio) {
+    return null;
+  }
+
+  return audio.rtp?.[0]?.codec || null;
+};
+
+
 module.exports = {
  isOnhold,
  mergeSdpMedia,
  extractSdpMedia,
  isOpusFirst,
  makeOpusFirst,
-  removeVideoSdp
+  removeVideoSdp,
+  getLeadingCodec
 };
--- a/lib/utils/stt-latency-calculator.js
+++ b/lib/utils/stt-latency-calculator.js
@@ -127,7 +127,6 @@ class SttLatencyCalculator extends Emitter {

  calculateLatency() {
    if (!this.isRunning) {
-      this.logger.debug('Latency calculator is not running, cannot calculate latency, returning default values');
      return null;
    }

--- a/lib/utils/transcription-utils.js
+++ b/lib/utils/transcription-utils.js
@@ -920,7 +920,7 @@ module.exports = (logger) => {
        ...(rOpts.initialSpeechTimeoutMs > 0 &&
          {AZURE_INITIAL_SPEECH_TIMEOUT_MS: rOpts.initialSpeechTimeoutMs}),
        ...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}),
-        ...(rOpts.audioLogging && {AZURE_AUDIO_LOGGING: 1}),
+        ...(azureOptions.audioLogging && {AZURE_AUDIO_LOGGING: 1}),
        ...{AZURE_USE_OUTPUT_FORMAT_DETAILED: 1},
        ...(azureOptions.speechSegmentationSilenceTimeoutMs &&
          {AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS: azureOptions.speechSegmentationSilenceTimeoutMs}),
@@ -1226,8 +1226,10 @@ module.exports = (logger) => {
        audioFormat, enableNoiseReduction, enableProfanityFilter, enablePunctuation,
        enableCapitalization, confidenceThreshold, enableDisfluencyFilter,
        maxResults, enableWordTimestamps, maxAlternatives, partialTranscriptInterval,
-        sessionTimeout, connectionTimeout, customVocabulary, languageModel
+        sessionTimeout, connectionTimeout, customVocabulary, languageModel,
+        requestInfo, sampleRate
      } = rOpts.houndifyOptions || {};
+      const audioEndpointUri = audioEndpoint || sttCredentials.houndify_server_uri;

      opts = {
        ...opts,
@@ -1263,10 +1265,12 @@ module.exports = (logger) => {
        ...(country && {HOUNDIFY_COUNTRY: country}),
        ...(timeZone && {HOUNDIFY_TIMEZONE: timeZone}),
        ...(domain && {HOUNDIFY_DOMAIN: domain}),
-        ...(audioEndpoint && {HOUNDIFY_AUDIO_ENDPOINT: audioEndpoint}),
+        ...(audioEndpointUri && {HOUNDIFY_AUDIO_ENDPOINT: audioEndpointUri}),
        ...(customVocabulary && {HOUNDIFY_CUSTOM_VOCABULARY:
          Array.isArray(customVocabulary) ? customVocabulary.join(',') : customVocabulary}),
        ...(languageModel && {HOUNDIFY_LANGUAGE_MODEL: languageModel}),
+        ...(requestInfo && {HOUNDIFY_REQUEST_INFO: JSON.stringify(requestInfo)}),
+        ...(sampleRate && {HOUNDIFY_SAMPLING_RATE: sampleRate}),
      };
    }
    else if ('voxist' === vendor) {
--- a/lib/utils/tts-streaming-buffer.js
+++ b/lib/utils/tts-streaming-buffer.js
@@ -163,7 +163,6 @@ class TtsStreamingBuffer extends Emitter {
  }

  clear() {
-    this.logger.debug('TtsStreamingBuffer:clear');
    if (this._connectionStatus !== TtsStreamingConnectionStatus.Connected) return;
    clearTimeout(this.timer);
    this._api(this.ep, [this.ep.uuid, 'clear']).catch((err) =>
@@ -437,7 +436,15 @@ class TtsStreamingBuffer extends Emitter {

 const findSentenceBoundary = (text, limit) => {
  // Look for punctuation or double newline that signals sentence end.
-  const sentenceEndRegex = /[.!?](?=\s|$)|\n\n/g;
+  // Includes:
+  //   - ASCII: . ! ?
+  //   - Arabic: ؟ (question mark), ۔ (full stop)
+  //   - Japanese: 。 (full stop), ！, ？ (full-width exclamation/question)
+  //
+  // For languages that use spaces between sentences, we still require
+  // whitespace or end-of-string after the mark. For Japanese (no spaces),
+  // we treat the punctuation itself as a boundary regardless of following char.
+  const sentenceEndRegex = /[.!?؟۔](?=\s|$)|[。！？]|\n\n/g;
  let lastSentenceBoundary = -1;
  let match;
  while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
--- a/package-lock.json
+++ b/package-lock.json
@@ -18,7 +18,7 @@
        "@jambonz/speech-utils": "^0.2.26",
        "@jambonz/stats-collector": "^0.1.10",
        "@jambonz/time-series": "^0.2.14",
-        "@jambonz/verb-specifications": "^0.0.119",
+        "@jambonz/verb-specifications": "^0.0.122",
        "@modelcontextprotocol/sdk": "^1.9.0",
        "@opentelemetry/api": "^1.8.0",
        "@opentelemetry/exporter-jaeger": "^1.23.0",
@@ -1533,9 +1533,9 @@
      }
    },
    "node_modules/@jambonz/verb-specifications": {
-      "version": "0.0.119",
-      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.119.tgz",
-      "integrity": "sha512-AQsZ7EY2bBOjdhufKtZpdrhFobo6LBMLkuDJY058Q3qxGBtlZr3Wx9zrlVjJuNOGCEJSmMP7Gr/EtjtFFX2iTw==",
+      "version": "0.0.122",
+      "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.122.tgz",
+      "integrity": "sha512-7xqaULhKFywJ2ZuyiYt77iiJwJ+8b98Zt1X4+OqZ7Cdjhfo7S6KnR66XRVJHnekXbmfVv58kB0KWUux5TG//Sw==",
      "license": "MIT",
      "dependencies": {
        "debug": "^4.3.4",
@@ -6183,9 +6183,10 @@
      "license": "MIT"
    },
    "node_modules/js-yaml": {
-      "version": "3.14.1",
+      "version": "3.14.2",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz",
+      "integrity": "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
        "argparse": "^1.0.7",
        "esprima": "^4.0.0"
@@ -6542,6 +6543,8 @@
    },
    "node_modules/microsoft-cognitiveservices-speech-sdk/node_modules/utf-8-validate": {
      "version": "5.0.10",
+      "resolved": "https://registry.npmjs.org/utf-8-validate/-/utf-8-validate-5.0.10.tgz",
+      "integrity": "sha512-Z6czzLq4u8fPOyx7TU6X3dvUZVvoJmxSQ+IcrlmagKhilxlhZgxPK6C5Jqbkw1IDUmFTM+cz9QDnnLTwDz/2gQ==",
      "hasInstallScript": true,
      "license": "MIT",
      "optional": true,
--- a/package.json
+++ b/package.json
@@ -34,7 +34,7 @@
    "@jambonz/speech-utils": "^0.2.26",
    "@jambonz/stats-collector": "^0.1.10",
    "@jambonz/time-series": "^0.2.14",
-    "@jambonz/verb-specifications": "^0.0.119",
+    "@jambonz/verb-specifications": "^0.0.122",
    "@modelcontextprotocol/sdk": "^1.9.0",
    "@opentelemetry/api": "^1.8.0",
    "@opentelemetry/exporter-jaeger": "^1.23.0",
Author	SHA1	Message	Date
Dave Horton	e396b6aa98	fix #1466 : (#1467 ) * fix #1466: * do not send tts streaming events when we are not doing tts streaming	2025-12-09 09:43:53 -05:00
Vinod Dharashive	9104ebb603	Add configurable say chunk size (#1461 )	2025-12-08 10:54:27 -05:00
Vinod Dharashive	1ad0261336	Enhance TTS sentence boundary detection for Arabic and Japanese (#1464 ) Update sentenceEndRegex to treat the following as sentence boundaries: ASCII .!? followed by whitespace or end-of-text; Arabic question mark (؟) and full stop (۔) with the same rule; Japanese 。, ！, ？ treated as boundaries regardless of following character; and double newlines (\n\n). This improves streaming chunking for mixed-language content.	2025-12-08 10:44:20 -05:00
Hoan Luu Huu	7802822773	fixed dial verb cannot bridge 2 leg endpoints due to transcoding (#1457 ) * fixed dial verb cannot bridge 2 leg endpoints due to transcoding * wip	2025-12-03 07:16:25 -05:00
Hoan Luu Huu	edb4d21ce1	fixed undefine issue when setting tts streaming channel vars (#1456 )	2025-12-02 19:46:28 -05:00
Dave Horton	8048e9cf88	when dialing the B leg we check to see if we are using opus on the A leg, and if so we outdial B with opus first; however we were incorrectly checking the SDP on the A leg invite not the 200 OK we send back (#1455 )	2025-12-02 19:22:20 -05:00
Sam Machin	451feafed4	use timeout on HTTP requests (#1453 )	2025-12-02 07:41:47 -05:00
Ed Robbins	7f1543a0f3	Add ability to enable/disable Azure audio logging via azureOptions (#1432 )	2025-11-30 11:56:56 -05:00
Hoan Luu Huu	83955ba972	SoundHound support audio endpoint from speech credential (#1446 ) * SoundHound support audio endpoint from speech credential * add requestInfo and sampleRate to houndify channel variable * add requestInfo and sampleRate to houndify channel variable * wip * wip * wip * wip * wip * wip * wip	2025-11-30 11:55:20 -05:00
Hoan Luu Huu	a5fa5fce5b	Fixed transcribe 2 legs cannot fallback (#1451 ) * fixed transcribe cannot fallback for specific endpoint * wip * wip * wip * wip * wip * wip * wip * wip	2025-11-28 21:43:05 -05:00
Dave Horton	cc1751f500	fix race condition where gather resolves with speech transcript but t… (#1449 ) * fix race condition where gather resolves with speech transcript but timeout timer gets set after the resolve and is left running after gather completes * remove unneeded line of code	2025-11-27 11:44:49 -06:00
Ed Robbins	1a1f53aede	Compare sdp to determine if transcoding is being used. (#1444 ) * compare sdp for transcoding * refactor sdp check for leading codec * fix reference to epOther * minor changes * minor * fix #1447 * fix security issue * use convenience getter appIsUsingWebsockets in CallSession --------- Co-authored-by: Dave Horton <daveh@beachdognet.com>	2025-11-24 10:50:41 -06:00