fix #1466 : (#1467 )

* fix #1466: * do not send tts streaming events when we are not doing tts streaming
Add configurable say chunk size (#1461 )
2026-02-12 01:10:30 +00:00 · 2025-12-09 09:43:53 -05:00 · 2025-12-08 10:54:27 -05:00 · 2025-12-08 10:44:20 -05:00 · 2025-12-03 07:16:25 -05:00
8 changed files with 35 additions and 19 deletions
--- a/lib/config.js
+++ b/lib/config.js
@@ -139,6 +139,10 @@ const JAMBONES_USE_FREESWITCH_TIMER_FD = process.env.JAMBONES_USE_FREESWITCH_TIM
 const JAMBONES_DIAL_SBC_FOR_REGISTERED_USER = process.env.JAMBONES_DIAL_SBC_FOR_REGISTERED_USER || false;
 const JAMBONES_MEDIA_TIMEOUT_MS = process.env.JAMBONES_MEDIA_TIMEOUT_MS || 0;
 const JAMBONES_MEDIA_HOLD_TIMEOUT_MS = process.env.JAMBONES_MEDIA_HOLD_TIMEOUT_MS || 0;
+
+/* say / tts */
+const JAMBONES_SAY_CHUNK_SIZE = parseInt(process.env.JAMBONES_SAY_CHUNK_SIZE, 10) || 900;
+
 // jambonz
 const JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS =
  process.env.JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS;
@@ -231,5 +235,6 @@ module.exports = {
  JAMBONES_DIAL_SBC_FOR_REGISTERED_USER,
  JAMBONES_MEDIA_TIMEOUT_MS,
  JAMBONES_MEDIA_HOLD_TIMEOUT_MS,
+  JAMBONES_SAY_CHUNK_SIZE,
  JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS,
 };
--- a/lib/http-routes/api/create-call.js
+++ b/lib/http-routes/api/create-call.js
@@ -291,7 +291,7 @@ router.post('/',
            }, {
              ...(account.enable_debug_log && {level: 'debug'})
            });
-            app.requestor.logger = app.notifier.logger = sipLogger;
+            app.requestor.logger = app.notifier.logger = restDial.logger = sipLogger;
            const callInfo = new CallInfo({
              direction: CallDirection.Outbound,
              req: inviteReq,
--- a/lib/session/call-session.js
+++ b/lib/session/call-session.js
@@ -927,7 +927,7 @@ class CallSession extends Emitter {
        this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream enabled');
      } else {
        this.logger.debug(
-          'CallSession:enableBackgroundTtsStream - ignoring request as call does not have required conditions');
+          'CallSession:enableBackgroundTtsStream - ignoring request; conditions not met (probably not using ws api)');
      }
    }  catch (err) {
      this.logger.info({err, say}, 'CallSession:enableBackgroundTtsStream - Error creating background tts stream task');
@@ -941,9 +941,11 @@ class CallSession extends Emitter {
    }
  }
  clearTtsStream() {
-    this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
-      .catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
-    this.ttsStreamingBuffer?.clear();
+    if (this.isTtsStreamEnabled) {
+      this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
+        .catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
+      this.ttsStreamingBuffer?.clear();
+    }
  }

  startTtsStream() {
@@ -951,7 +953,7 @@ class CallSession extends Emitter {
  }

  stopTtsStream() {
-    if (this.appIsUsingWebsockets) {
+    if (this.isTtsStreamEnabled) {
      this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
        .catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
      this.ttsStreamingBuffer?.stop();
--- a/lib/tasks/dial.js
+++ b/lib/tasks/dial.js
@@ -158,7 +158,7 @@ class TaskDial extends Task {

  get canReleaseMedia() {
    const keepAnchor = this.data.anchorMedia ||
-      this.weAreTranscoding ||
+      this.isTranscoding ||
      this.cs.isBackGroundListen ||
      this.cs.onHoldMusic ||
      ANCHOR_MEDIA_ALWAYS ||
@@ -773,6 +773,15 @@ class TaskDial extends Task {
  }

  async _connectSingleDial(cs, sd) {
+    // start connect with dialed leg, this is the soonest we can identify transcoding
+    if (this.epOther && sd.ep) {
+      const codecA = getLeadingCodec(this.epOther.local.sdp);
+      const codecB = getLeadingCodec(sd.ep.remote.sdp);
+      this.isTranscoding = (codecA !== codecB);
+      if (this.isTranscoding) {
+        this.logger.info(`Dial:_connectSingleDial - transcoding from ${codecA} (A leg) to ${codecB} (B leg)`);
+      }
+    }
    if (!this.bridged && !this.canReleaseMedia) {
      this.logger.debug('Dial:_connectSingleDial bridging endpoints');
      if (this.epOther) {
@@ -930,13 +939,6 @@ class TaskDial extends Task {
        this.logger.info({err}, 'Dial:_selectSingleDial - Error boosting audio signal');
      }
    }
-    /* basic determination to see if call is being transcoded */
-    const codecA = getLeadingCodec(this.epOther.local.sdp);
-    const codecB = getLeadingCodec(this.ep.remote.sdp);
-    this.weAreTranscoding = (codecA !== codecB);
-    if (this.weAreTranscoding) {
-      this.logger.info(`Dial:_selectSingleDial - transcoding from ${codecA} (A leg) to ${codecB} (B leg)`);
-    }
    /* if we can release the media back to the SBC, do so now */
    if (this.canReleaseMedia || this.shouldExitMediaPathEntirely) {
      setTimeout(this._releaseMedia.bind(this, cs, sd, this.shouldExitMediaPathEntirely), 200);
--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -881,7 +881,7 @@ class TaskGather extends SttTask {
        this._fillerNoiseOn = false;  // in a race, if we just started audio it may sneak through here
        this.ep.api('uuid_break', this.ep.uuid)
          .catch((err) => this.logger.info(err, 'Error killing audio'));
-        cs.clearTtsStream();
+        if (cs.isTtsStreamEnabled) cs.clearTtsStream();
      }
      return;
    }
--- a/lib/tasks/say.js
+++ b/lib/tasks/say.js
@@ -1,6 +1,7 @@
 const assert = require('assert');
 const TtsTask = require('./tts-task');
 const {TaskName, TaskPreconditions} = require('../utils/constants');
+const {JAMBONES_SAY_CHUNK_SIZE} = require('../config');
 const pollySSMLSplit = require('polly-ssml-split');
 const { SpeechCredentialError, NonFatalTaskError } = require('../utils/error');
 const { sleepFor } = require('../utils/helpers');
@@ -31,7 +32,7 @@ const isMatchingEvent = (logger, filename, playbackId, evt) => {
 const breakLengthyTextIfNeeded = (logger, text) => {
  // As The text can be used for tts streaming, we need to break lengthy text into smaller chunks
 // HIGH_WATER_BUFFER_SIZE defined in tts-streaming-buffer.js
-  const chunkSize = 900;
+  const chunkSize = JAMBONES_SAY_CHUNK_SIZE;
  const isSSML = text.startsWith('<speak>');
  const options = {
    softLimit: 100,
--- a/lib/utils/stt-latency-calculator.js
+++ b/lib/utils/stt-latency-calculator.js
@@ -127,7 +127,6 @@ class SttLatencyCalculator extends Emitter {

  calculateLatency() {
    if (!this.isRunning) {
-      this.logger.debug('Latency calculator is not running, cannot calculate latency, returning default values');
      return null;
    }

--- a/lib/utils/tts-streaming-buffer.js
+++ b/lib/utils/tts-streaming-buffer.js
@@ -163,7 +163,6 @@ class TtsStreamingBuffer extends Emitter {
  }

  clear() {
-    this.logger.debug('TtsStreamingBuffer:clear');
    if (this._connectionStatus !== TtsStreamingConnectionStatus.Connected) return;
    clearTimeout(this.timer);
    this._api(this.ep, [this.ep.uuid, 'clear']).catch((err) =>
@@ -437,7 +436,15 @@ class TtsStreamingBuffer extends Emitter {

 const findSentenceBoundary = (text, limit) => {
  // Look for punctuation or double newline that signals sentence end.
-  const sentenceEndRegex = /[.!?](?=\s|$)|\n\n/g;
+  // Includes:
+  //   - ASCII: . ! ?
+  //   - Arabic: ؟ (question mark), ۔ (full stop)
+  //   - Japanese: 。 (full stop), ！, ？ (full-width exclamation/question)
+  //
+  // For languages that use spaces between sentences, we still require
+  // whitespace or end-of-string after the mark. For Japanese (no spaces),
+  // we treat the punctuation itself as a boundary regardless of following char.
+  const sentenceEndRegex = /[.!?؟۔](?=\s|$)|[。！？]|\n\n/g;
  let lastSentenceBoundary = -1;
  let match;
  while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
Author	SHA1	Message	Date
Dave Horton	e396b6aa98	fix #1466 : (#1467 ) * fix #1466: * do not send tts streaming events when we are not doing tts streaming	2025-12-09 09:43:53 -05:00
Vinod Dharashive	9104ebb603	Add configurable say chunk size (#1461 )	2025-12-08 10:54:27 -05:00
Vinod Dharashive	1ad0261336	Enhance TTS sentence boundary detection for Arabic and Japanese (#1464 ) Update sentenceEndRegex to treat the following as sentence boundaries: ASCII .!? followed by whitespace or end-of-text; Arabic question mark (؟) and full stop (۔) with the same rule; Japanese 。, ！, ？ treated as boundaries regardless of following character; and double newlines (\n\n). This improves streaming chunking for mixed-language content.	2025-12-08 10:44:20 -05:00
Hoan Luu Huu	7802822773	fixed dial verb cannot bridge 2 leg endpoints due to transcoding (#1457 ) * fixed dial verb cannot bridge 2 leg endpoints due to transcoding * wip	2025-12-03 07:16:25 -05:00