Fixes/ws testing dh (#704)

* fixes from testing with translator app * more updates * linting * update gh actions to node 20 * add support for google v2 preconfigured recognizer * add support for google voice activity events * update to speech-utils@0.0.45 * update speech-utils to support caching azure tts * transcribe must buffer transcripts for channel 1 and 2 separately * further fix for accumulating transcripts * linting * deepgram sends transcripts with empty alternatives array * fix deepgram returning an empty array
2026-02-13 09:49:30 +00:00 · 2024-04-03 14:30:49 -04:00
parent 72147a8110
commit 8999c85a71
11 changed files with 107 additions and 48 deletions
--- a/lib/tasks/dub.js
+++ b/lib/tasks/dub.js
@@ -125,10 +125,12 @@ class TaskDub extends TtsTask {
    const path = filepath[0];
    if (!path.startsWith('say:{')) {
      /* we have a local file of mp3 or r8 of synthesized speech audio to play */
+      this.logger.info(`playing synthesized speech from file on track ${this.track}: ${path}`);
      this.play = path;
      await this._playOnTrack(cs, ep);
    }
    else {
+      this.logger.info(`doing actual text to speech file on track ${this.track}: ${path}`);
      await ep.dub({
        action: 'sayOnTrack',
        track: this.track,
--- a/lib/tasks/enqueue.js
+++ b/lib/tasks/enqueue.js
@@ -338,6 +338,7 @@ class TaskEnqueue extends Task {
      this.logger.error({err}, `TaskEnqueue:_playHook error retrieving list info for queue ${this.queueName}`);
    }
    const json = await cs.application.requestor.request('verb:hook', hook, params, httpHeaders);
+    this.logger.debug({json}, 'TaskEnqueue:_playHook: received response from waitHook');
    const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));

    const allowedTasks = tasks.filter((t) => allowed.includes(t.name));
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -58,7 +58,7 @@ class TaskTranscribe extends SttTask {
      this.isContinuousAsr = true;
    }
    /* buffer speech for continuous asr */
-    this._bufferedTranscripts = [];
+    this._bufferedTranscripts = [ [], [] ];  // for channel 1 and 2
    this.bugname_prefix = 'transcribe_';
    this.paused = false;
  }
@@ -326,6 +326,7 @@ class TaskTranscribe extends SttTask {
    // make sure this is not a transcript from answering machine detection
    const bugname = fsEvent.getHeader('media-bugname');
    const finished = fsEvent.getHeader('transcription-session-finished');
+    const bufferedTranscripts = this._bufferedTranscripts[channel - 1];
    if (bugname && this.bugname !== bugname) return;
    if (this.paused) {
      this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - paused, ignoring transcript');
@@ -335,14 +336,14 @@ class TaskTranscribe extends SttTask {

    if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
      /* we will only get this when we have set utterance_end_ms */
-      if (this._bufferedTranscripts.length === 0) {
+      if (bufferedTranscripts.length === 0) {
        this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram but no buffered transcripts');
      }
      else {
        this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
-        evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
+        evt = this.consolidateTranscripts(bufferedTranscripts, channel, this.language, this.vendor);
        evt.is_final = true;
-        this._bufferedTranscripts = [];
+        this._bufferedTranscripts[channel - 1] = [];
        this._resolve(channel, evt);
      }
      return;
@@ -359,11 +360,11 @@ class TaskTranscribe extends SttTask {

    let emptyTranscript = false;
    if (evt.is_final) {
-      if (evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
+      if (evt.alternatives.length === 0 || evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
        emptyTranscript = true;
        if (finished === 'true' &&
          ['microsoft', 'deepgram'].includes(this.vendor) &&
-          this._bufferedTranscripts.length === 0) {
+          bufferedTranscripts.length === 0) {
          this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
          return;
        }
@@ -376,7 +377,7 @@ class TaskTranscribe extends SttTask {
            'TaskGather:_onTranscription - got empty deepgram transcript during continous asr, continue listening');
          return;
        }
-        else if (this.vendor === 'deepgram' && this._bufferedTranscripts.length > 0) {
+        else if (this.vendor === 'deepgram' && bufferedTranscripts.length > 0) {
          this.logger.info({evt},
            'TaskGather:_onTranscription - got empty transcript from deepgram, return the buffered transcripts');
        }
@@ -392,11 +393,12 @@ class TaskTranscribe extends SttTask {
          }
        }
        this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
-        this._bufferedTranscripts.push(evt);
+        bufferedTranscripts.push(evt);
        this._startAsrTimer(channel);

        /* some STT engines will keep listening after a final response, so no need to restart */
-        if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(cs, ep, channel);
+        if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
+          .includes(this.vendor)) this._startTranscribing(cs, ep, channel);
      }
      else {
        if (this.vendor === 'soniox') {
@@ -407,19 +409,20 @@ class TaskTranscribe extends SttTask {
        }
        else if (this.vendor === 'deepgram') {
          /* compile transcripts into one */
-          if (!emptyTranscript) this._bufferedTranscripts.push(evt);
+          if (!emptyTranscript) bufferedTranscripts.push(evt);

          /* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */
-          if (this._bufferedTranscripts.length === 0) return;
-          evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language);
-          this._bufferedTranscripts = [];
+          if (bufferedTranscripts.length === 0) return;
+          evt = this.consolidateTranscripts(bufferedTranscripts, channel, this.language);
+          this._bufferedTranscripts[channel - 1] = [];
        }

        /* here is where we return a final transcript */
        this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending final transcript');
        this._resolve(channel, evt);
        /* some STT engines will keep listening after a final response, so no need to restart */
-        if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(cs, ep, channel);
+        if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
+          .includes(this.vendor)) this._startTranscribing(cs, ep, channel);
      }
    }
    else {
@@ -430,7 +433,7 @@ class TaskTranscribe extends SttTask {
        const originalEvent = evt.vendor.evt;
        if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
          this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
-          this._bufferedTranscripts.push(evt);
+          bufferedTranscripts.push(evt);
        }
      }

@@ -591,8 +594,9 @@ class TaskTranscribe extends SttTask {
    this._clearAsrTimer(channel);
    this._asrTimer = setTimeout(() => {
      this.logger.debug(`TaskTranscribe:_startAsrTimer - asr timer went off for channel: ${channel}`);
-      const evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language, this.vendor);
-      this._bufferedTranscripts = [];
+      const evt = this.consolidateTranscripts(
+        this._bufferedTranscripts[channel - 1], channel, this.language, this.vendor);
+      this._bufferedTranscripts[channel - 1] = [];
      this._resolve(channel, evt);
    }, this.asrTimeout);
    this.logger.debug(`TaskTranscribe:_startAsrTimer: set for ${this.asrTimeout}ms for channel ${channel}`);