diff --git a/lib/tasks/dial.js b/lib/tasks/dial.js index 08baacb0..511eeca1 100644 --- a/lib/tasks/dial.js +++ b/lib/tasks/dial.js @@ -606,7 +606,7 @@ class TaskDial extends Task { if (this.parentDtmfCollector) this._installDtmfDetection(cs, cs.dlg); if (this.childDtmfCollector) this._installDtmfDetection(cs, this.dlg); - if (this.transcribeTask) this.transcribeTask.exec(cs, this.epOther); + if (this.transcribeTask) this.transcribeTask.exec(cs, this.epOther, this.ep); if (this.listenTask) this.listenTask.exec(cs, this.epOther); /* if we can release the media back to the SBC, do so now */ diff --git a/lib/tasks/transcribe.js b/lib/tasks/transcribe.js index 59808946..20e75cdf 100644 --- a/lib/tasks/transcribe.js +++ b/lib/tasks/transcribe.js @@ -58,11 +58,12 @@ class TaskTranscribe extends Task { get name() { return TaskName.Transcribe; } - async exec(cs, ep, parentTask) { + async exec(cs, ep, ep2) { super.exec(cs); const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf); this.ep = ep; + this.ep2 = ep2; if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor; if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage; this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt'); @@ -78,7 +79,9 @@ class TaskTranscribe extends Task { }).catch((err) => this.logger.info({err}, 'Error generating alert for no stt')); throw new Error('no provisioned speech credentials for TTS'); } - await this._startTranscribing(cs, ep); + await this._startTranscribing(cs, ep, 1); + if (this.separateRecognitionPerChannel && ep2) await this._startTranscribing(cs, ep2, 2); + updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid) .catch(() => {/*already logged error */}); @@ -106,11 +109,15 @@ class TaskTranscribe extends Task { // hangup after 1 sec if we don't get a final transcription this._timer = setTimeout(() => this.notifyTaskDone(), 1000); } + if (this.separateRecognitionPerChannel && this.ep2 && this.ep2.connected) { + this.ep2.stopTranscription({vendor: this.vendor}) + .catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill')); + } else this.notifyTaskDone(); await this.awaitTaskDone(); } - async _startTranscribing(cs, ep) { + async _startTranscribing(cs, ep, channel) { const opts = {}; if (this.vad.enable) { @@ -119,22 +126,24 @@ class TaskTranscribe extends Task { if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode; } - ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); - ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep)); + ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, + this._onTranscription.bind(this, cs, ep, channel)); + ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel)); ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded, - this._onMaxDurationExceeded.bind(this, cs, ep)); - ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); - ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep)); + this._onMaxDurationExceeded.bind(this, cs, ep, channel)); + ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel)); + ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel)); ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded, - this._onMaxDurationExceeded.bind(this, cs, ep)); - ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); - ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep)); + this._onMaxDurationExceeded.bind(this, cs, ep, channel)); + ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, + this._onTranscription.bind(this, cs, ep, channel)); + ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep, channel)); if (this.vendor === 'google') { if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials); [ ['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'], - ['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'], + //['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'], ['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'], ['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'], ['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'], @@ -222,12 +231,12 @@ class TaskTranscribe extends Task { vendor: this.vendor, interim: this.interim ? true : false, locale: this.language, - channels: this.separateRecognitionPerChannel ? 2 : 1 + channels: /*this.separateRecognitionPerChannel ? 2 : */ 1 }); } - _onTranscription(cs, ep, evt) { - this.logger.debug({evt}, 'TaskTranscribe:_onTranscription'); + _onTranscription(cs, ep, channel, evt) { + this.logger.debug({evt, channel}, 'TaskTranscribe:_onTranscription'); if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0]; if ('microsoft' === this.vendor) { const nbest = evt.NBest; @@ -240,12 +249,13 @@ class TaskTranscribe extends Task { }) : [ { - transcript: evt.Text + transcript: evt.DisplayText } ]; const newEvent = { is_final: evt.RecognitionStatus === 'Success', + channel, language_code, alternatives }; @@ -257,6 +267,8 @@ class TaskTranscribe extends Task { return this._transcribe(ep); } + evt.channel_tag = channel; + if (this.transcriptionHook) { const b3 = this.getTracingPropagation(); const httpHeaders = b3 && {b3}; @@ -274,13 +286,13 @@ class TaskTranscribe extends Task { } } - _onNoAudio(cs, ep) { - this.logger.debug('TaskTranscribe:_onNoAudio restarting transcription'); + _onNoAudio(cs, ep, channel) { + this.logger.debug(`TaskTranscribe:_onNoAudio restarting transcription on channel ${channel}`); this._transcribe(ep); } - _onMaxDurationExceeded(cs, ep) { - this.logger.debug('TaskTranscribe:_onMaxDurationExceeded restarting transcription'); + _onMaxDurationExceeded(cs, ep, channel) { + this.logger.debug(`TaskTranscribe:_onMaxDurationExceeded restarting transcription on channel ${channel}`); this._transcribe(ep); }