Feat/ambient sounds (#678)

* initial support for coaching mode in conference * wip * wip * add support for answer verb * wip * wip * wip * wip * wip * updates to rename option to dub * wip * wip * wip * update verb-specs * wip * wip * wip * wip * wip * wip * wip * wip * add option to boost audio signal in main channel * wip * wip * wip * wip * wip * wip * for now, bypass use of streaming apis when generating tts audio for dub tracks * add nested dub to dial * wip * add support for filler noise * kill filler noise when gather killed * wip * wip * while using sayOnTrack, we have to enclose the say command in double quotes * disableTtsStreaming = false * allow transcribe of b leg only on dial verb * dub.say can either be text or object like say verb with text and synthesizer * remove loop for sayOnTrack * update speech-utils * fixes for testing transcribe verb and support for dub and boostAudioSignal in lcc commands * add dial.boostAudioSignal * fix bug where session-level recognizer settings incorrectly overwrite verb-level settings * update verb specs * update dial to support array of dub verbs * fix bug setting gain * lint * wip * update speech-utils * use new endpoint methods for mod_dub --------- Co-authored-by: Dave Horton <daveh@beachdognet.com>
2025-12-20 16:50:39 +00:00 · 2024-03-24 03:23:57 +07:00
parent ec58232b61
commit 5b1d8a8ff3
18 changed files with 915 additions and 278 deletions
--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -27,7 +27,7 @@ class TaskGather extends SttTask {
    [
      'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
      'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
-      'speechTimeout', 'timeout', 'say', 'play', 'actionHookDelayAction'
+      'speechTimeout', 'timeout', 'say', 'play', 'actionHookDelayAction', 'fillerNoise'
    ].forEach((k) => this[k] = this.data[k]);

    // gather default input is digits
@@ -91,6 +91,18 @@ class TaskGather extends SttTask {
      (this.playTask && this.playTask.earlyMedia);
  }

+  get hasFillerNoise() {
+    return Object.keys(this.fillerNoise).length > 0 && this.fillerNoise.enabled !== false;
+  }
+
+  get fillerNoiseUrl() {
+    return this.fillerNoise.url;
+  }
+
+  get fillerNoiseStartDelaySecs() {
+    return this.fillerNoise.startDelaySecs;
+  }
+
  get summary() {
    let s = `${this.name}{`;
    if (this.input.length === 2) s += 'inputs=[speech,digits],';
@@ -111,6 +123,11 @@ class TaskGather extends SttTask {
    await super.exec(cs, {ep});
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);

+    this.fillerNoise = {
+      ...(cs.fillerNoise || {}),
+      ...(this.fillerNoise || {})
+    };
+
    if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
      const {hints, hintsBoost} = cs.globalSttHints;
      const setOfHints = new Set((this.data.recognizer.hints || [])
@@ -255,6 +272,7 @@ class TaskGather extends SttTask {
    super.kill(cs);
    this._killAudio(cs);
    this._killActionHookDelayAction();
+    this._clearFillerNoiseTimer();
    this.ep.removeAllListeners('dtmf');
    clearTimeout(this.interDigitTimer);
    this._clearAsrTimer();
@@ -674,9 +692,29 @@ class TaskGather extends SttTask {
    this._finalAsrTimer = null;
  }

+  _startFillerNoiseTimer() {
+    this._clearFillerNoiseTimer();
+    this._fillerNoiseTimer = setTimeout(() => {
+      this.logger.debug('Gather:_startFillerNoiseTimer - playing filler noise');
+      this.ep?.play(this.fillerNoise.url);
+    }, this.fillerNoise.startDelaySecs * 1000);
+  }
+
+  _clearFillerNoiseTimer() {
+    if (this._fillerNoiseTimer) clearTimeout(this._fillerNoiseTimer);
+    this._fillerNoiseTimer = null;
+  }
+
+  _killFillerNoise() {
+    if (this._fillerNoiseTimer) {
+      this.logger.debug('Gather:_killFillerNoise');
+      this.ep?.api('uuid_break', this.ep.uuid);
+    }
+  }
+
  _killAudio(cs) {
-    if (!this.sayTask && !this.playTask && this.bargein) {
-      if (this.ep?.connected && !this.playComplete) {
+    if (this.hasFillerNoise || (!this.sayTask && !this.playTask && this.bargein)) {
+      if (this.ep?.connected && (!this.playComplete || this.hasFillerNoise)) {
        this.logger.debug('Gather:_killAudio: killing playback of any audio');
        this.playComplete = true;
        this.ep.api('uuid_break', this.ep.uuid)
@@ -1004,6 +1042,16 @@ class TaskGather extends SttTask {
      this._startActionHookNoResponseGiveUpTimer();
    }

+    if (this.hasFillerNoise && (reason.startsWith('dtmf') || reason.startsWith('speech'))) {
+      if (this.fillerNoiseStartDelaySecs > 0) {
+        this._startFillerNoiseTimer();
+      }
+      else {
+        this.logger.debug(`TaskGather:_resolve - playing filler noise: ${this.fillerNoiseUrl}`);
+        this.ep.play(this.fillerNoiseUrl);
+      }
+    }
+
    try {
      if (reason.startsWith('dtmf')) {
        if (this.parentTask) this.parentTask.emit('dtmf', evt);
@@ -1038,6 +1086,7 @@ class TaskGather extends SttTask {
    // Gather got response from hook, cancel all delay timers if there is any
    this._clearActionHookNoResponseTimer();
    this._clearActionHookNoResponseGiveUpTimer();
+    this._clearFillerNoiseTimer();

    this.notifyTaskDone();
  }