|
|
|
|
@@ -58,11 +58,12 @@ class TaskTranscribe extends Task {
|
|
|
|
|
|
|
|
|
|
get name() { return TaskName.Transcribe; }
|
|
|
|
|
|
|
|
|
|
async exec(cs, ep, parentTask) {
|
|
|
|
|
async exec(cs, ep, ep2) {
|
|
|
|
|
super.exec(cs);
|
|
|
|
|
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
|
|
|
|
|
|
|
|
|
this.ep = ep;
|
|
|
|
|
this.ep2 = ep2;
|
|
|
|
|
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
|
|
|
|
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
|
|
|
|
this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
|
|
|
|
@@ -78,7 +79,9 @@ class TaskTranscribe extends Task {
|
|
|
|
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
|
|
|
|
throw new Error('no provisioned speech credentials for TTS');
|
|
|
|
|
}
|
|
|
|
|
await this._startTranscribing(cs, ep);
|
|
|
|
|
await this._startTranscribing(cs, ep, 1);
|
|
|
|
|
if (this.separateRecognitionPerChannel && ep2) await this._startTranscribing(cs, ep2, 2);
|
|
|
|
|
|
|
|
|
|
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
|
|
|
|
|
.catch(() => {/*already logged error */});
|
|
|
|
|
|
|
|
|
|
@@ -106,11 +109,15 @@ class TaskTranscribe extends Task {
|
|
|
|
|
// hangup after 1 sec if we don't get a final transcription
|
|
|
|
|
this._timer = setTimeout(() => this.notifyTaskDone(), 1000);
|
|
|
|
|
}
|
|
|
|
|
if (this.separateRecognitionPerChannel && this.ep2 && this.ep2.connected) {
|
|
|
|
|
this.ep2.stopTranscription({vendor: this.vendor})
|
|
|
|
|
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
|
|
|
|
|
}
|
|
|
|
|
else this.notifyTaskDone();
|
|
|
|
|
await this.awaitTaskDone();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async _startTranscribing(cs, ep) {
|
|
|
|
|
async _startTranscribing(cs, ep, channel) {
|
|
|
|
|
const opts = {};
|
|
|
|
|
|
|
|
|
|
if (this.vad.enable) {
|
|
|
|
|
@@ -119,22 +126,24 @@ class TaskTranscribe extends Task {
|
|
|
|
|
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
|
|
|
|
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
|
|
|
|
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription,
|
|
|
|
|
this._onTranscription.bind(this, cs, ep, channel));
|
|
|
|
|
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
|
|
|
|
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
|
|
|
|
|
this._onMaxDurationExceeded.bind(this, cs, ep));
|
|
|
|
|
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
|
|
|
|
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
|
|
|
|
|
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
|
|
|
|
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
|
|
|
|
|
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
|
|
|
|
ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded,
|
|
|
|
|
this._onMaxDurationExceeded.bind(this, cs, ep));
|
|
|
|
|
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
|
|
|
|
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep));
|
|
|
|
|
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
|
|
|
|
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription,
|
|
|
|
|
this._onTranscription.bind(this, cs, ep, channel));
|
|
|
|
|
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
|
|
|
|
|
|
|
|
|
if (this.vendor === 'google') {
|
|
|
|
|
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
|
|
|
|
[
|
|
|
|
|
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
|
|
|
|
|
['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
|
|
|
|
|
//['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
|
|
|
|
|
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
|
|
|
|
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
|
|
|
|
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
|
|
|
|
@@ -222,12 +231,12 @@ class TaskTranscribe extends Task {
|
|
|
|
|
vendor: this.vendor,
|
|
|
|
|
interim: this.interim ? true : false,
|
|
|
|
|
locale: this.language,
|
|
|
|
|
channels: this.separateRecognitionPerChannel ? 2 : 1
|
|
|
|
|
channels: /*this.separateRecognitionPerChannel ? 2 : */ 1
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_onTranscription(cs, ep, evt) {
|
|
|
|
|
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
|
|
|
|
|
_onTranscription(cs, ep, channel, evt) {
|
|
|
|
|
this.logger.debug({evt, channel}, 'TaskTranscribe:_onTranscription');
|
|
|
|
|
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
|
|
|
|
|
if ('microsoft' === this.vendor) {
|
|
|
|
|
const nbest = evt.NBest;
|
|
|
|
|
@@ -240,12 +249,13 @@ class TaskTranscribe extends Task {
|
|
|
|
|
}) :
|
|
|
|
|
[
|
|
|
|
|
{
|
|
|
|
|
transcript: evt.Text
|
|
|
|
|
transcript: evt.DisplayText
|
|
|
|
|
}
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const newEvent = {
|
|
|
|
|
is_final: evt.RecognitionStatus === 'Success',
|
|
|
|
|
channel,
|
|
|
|
|
language_code,
|
|
|
|
|
alternatives
|
|
|
|
|
};
|
|
|
|
|
@@ -257,6 +267,8 @@ class TaskTranscribe extends Task {
|
|
|
|
|
return this._transcribe(ep);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
evt.channel_tag = channel;
|
|
|
|
|
|
|
|
|
|
if (this.transcriptionHook) {
|
|
|
|
|
const b3 = this.getTracingPropagation();
|
|
|
|
|
const httpHeaders = b3 && {b3};
|
|
|
|
|
@@ -274,13 +286,13 @@ class TaskTranscribe extends Task {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_onNoAudio(cs, ep) {
|
|
|
|
|
this.logger.debug('TaskTranscribe:_onNoAudio restarting transcription');
|
|
|
|
|
_onNoAudio(cs, ep, channel) {
|
|
|
|
|
this.logger.debug(`TaskTranscribe:_onNoAudio restarting transcription on channel ${channel}`);
|
|
|
|
|
this._transcribe(ep);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_onMaxDurationExceeded(cs, ep) {
|
|
|
|
|
this.logger.debug('TaskTranscribe:_onMaxDurationExceeded restarting transcription');
|
|
|
|
|
_onMaxDurationExceeded(cs, ep, channel) {
|
|
|
|
|
this.logger.debug(`TaskTranscribe:_onMaxDurationExceeded restarting transcription on channel ${channel}`);
|
|
|
|
|
this._transcribe(ep);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|