bugfix: transcribe of a dialed call can now occur on both legs

2025-12-20 08:40:38 +00:00 · 2022-05-15 13:45:55 -04:00
parent 0ee13fb794
commit c3e5ffa52d
2 changed files with 33 additions and 21 deletions
--- a/lib/tasks/dial.js
+++ b/lib/tasks/dial.js
@@ -606,7 +606,7 @@ class TaskDial extends Task {
    if (this.parentDtmfCollector) this._installDtmfDetection(cs, cs.dlg);
    if (this.childDtmfCollector) this._installDtmfDetection(cs, this.dlg);

-    if (this.transcribeTask) this.transcribeTask.exec(cs, this.epOther);
+    if (this.transcribeTask) this.transcribeTask.exec(cs, this.epOther, this.ep);
    if (this.listenTask) this.listenTask.exec(cs, this.epOther);

    /* if we can release the media back to the SBC, do so now */
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -58,11 +58,12 @@ class TaskTranscribe extends Task {

  get name() { return TaskName.Transcribe; }

-  async exec(cs, ep, parentTask) {
+  async exec(cs, ep, ep2) {
    super.exec(cs);
    const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);

    this.ep = ep;
+    this.ep2 = ep2;
    if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
    if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
    this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
@@ -78,7 +79,9 @@ class TaskTranscribe extends Task {
        }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
        throw new Error('no provisioned speech credentials for TTS');
      }
-      await this._startTranscribing(cs, ep);
+      await this._startTranscribing(cs, ep, 1);
+      if (this.separateRecognitionPerChannel && ep2) await this._startTranscribing(cs, ep2, 2);
+
      updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
        .catch(() => {/*already logged error */});

@@ -106,11 +109,15 @@ class TaskTranscribe extends Task {
      // hangup after 1 sec if we don't get a final transcription
      this._timer = setTimeout(() => this.notifyTaskDone(), 1000);
    }
+    if (this.separateRecognitionPerChannel && this.ep2 && this.ep2.connected) {
+      this.ep2.stopTranscription({vendor: this.vendor})
+        .catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
+    }
    else this.notifyTaskDone();
    await this.awaitTaskDone();
  }

-  async _startTranscribing(cs, ep) {
+  async _startTranscribing(cs, ep, channel) {
    const opts = {};

    if (this.vad.enable) {
@@ -119,22 +126,24 @@ class TaskTranscribe extends Task {
      if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
    }

-    ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
-    ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
+    ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription,
+      this._onTranscription.bind(this, cs, ep, channel));
+    ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
    ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
-      this._onMaxDurationExceeded.bind(this, cs, ep));
-    ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
-    ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
+      this._onMaxDurationExceeded.bind(this, cs, ep, channel));
+    ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
+    ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
    ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded,
-      this._onMaxDurationExceeded.bind(this, cs, ep));
-    ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
-    ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep));
+      this._onMaxDurationExceeded.bind(this, cs, ep, channel));
+    ep.addCustomEventListener(AzureTranscriptionEvents.Transcription,
+      this._onTranscription.bind(this, cs, ep, channel));
+    ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep, channel));

    if (this.vendor === 'google') {
      if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
      [
        ['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
-        ['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
+        //['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
        ['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
        ['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
        ['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
@@ -222,12 +231,12 @@ class TaskTranscribe extends Task {
      vendor: this.vendor,
      interim: this.interim ? true : false,
      locale: this.language,
-      channels: this.separateRecognitionPerChannel ? 2 : 1
+      channels: /*this.separateRecognitionPerChannel ? 2 : */ 1
    });
  }

-  _onTranscription(cs, ep, evt) {
-    this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
+  _onTranscription(cs, ep, channel, evt) {
+    this.logger.debug({evt, channel}, 'TaskTranscribe:_onTranscription');
    if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
    if ('microsoft' === this.vendor) {
      const nbest = evt.NBest;
@@ -240,12 +249,13 @@ class TaskTranscribe extends Task {
      }) :
        [
          {
-            transcript: evt.Text
+            transcript: evt.DisplayText
          }
        ];

      const newEvent = {
        is_final: evt.RecognitionStatus === 'Success',
+        channel,
        language_code,
        alternatives
      };
@@ -257,6 +267,8 @@ class TaskTranscribe extends Task {
      return this._transcribe(ep);
    }

+    evt.channel_tag = channel;
+
    if (this.transcriptionHook) {
      const b3 = this.getTracingPropagation();
      const httpHeaders = b3 && {b3};
@@ -274,13 +286,13 @@ class TaskTranscribe extends Task {
    }
  }

-  _onNoAudio(cs, ep) {
-    this.logger.debug('TaskTranscribe:_onNoAudio restarting transcription');
+  _onNoAudio(cs, ep, channel) {
+    this.logger.debug(`TaskTranscribe:_onNoAudio restarting transcription on channel ${channel}`);
    this._transcribe(ep);
  }

-  _onMaxDurationExceeded(cs, ep) {
-    this.logger.debug('TaskTranscribe:_onMaxDurationExceeded restarting transcription');
+  _onMaxDurationExceeded(cs, ep, channel) {
+    this.logger.debug(`TaskTranscribe:_onMaxDurationExceeded restarting transcription on channel ${channel}`);
    this._transcribe(ep);
  }