mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
bugfix: transcribe of a dialed call can now occur on both legs
This commit is contained in:
@@ -606,7 +606,7 @@ class TaskDial extends Task {
|
|||||||
if (this.parentDtmfCollector) this._installDtmfDetection(cs, cs.dlg);
|
if (this.parentDtmfCollector) this._installDtmfDetection(cs, cs.dlg);
|
||||||
if (this.childDtmfCollector) this._installDtmfDetection(cs, this.dlg);
|
if (this.childDtmfCollector) this._installDtmfDetection(cs, this.dlg);
|
||||||
|
|
||||||
if (this.transcribeTask) this.transcribeTask.exec(cs, this.epOther);
|
if (this.transcribeTask) this.transcribeTask.exec(cs, this.epOther, this.ep);
|
||||||
if (this.listenTask) this.listenTask.exec(cs, this.epOther);
|
if (this.listenTask) this.listenTask.exec(cs, this.epOther);
|
||||||
|
|
||||||
/* if we can release the media back to the SBC, do so now */
|
/* if we can release the media back to the SBC, do so now */
|
||||||
|
|||||||
@@ -58,11 +58,12 @@ class TaskTranscribe extends Task {
|
|||||||
|
|
||||||
get name() { return TaskName.Transcribe; }
|
get name() { return TaskName.Transcribe; }
|
||||||
|
|
||||||
async exec(cs, ep, parentTask) {
|
async exec(cs, ep, ep2) {
|
||||||
super.exec(cs);
|
super.exec(cs);
|
||||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||||
|
|
||||||
this.ep = ep;
|
this.ep = ep;
|
||||||
|
this.ep2 = ep2;
|
||||||
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
||||||
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
||||||
this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
||||||
@@ -78,7 +79,9 @@ class TaskTranscribe extends Task {
|
|||||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
||||||
throw new Error('no provisioned speech credentials for TTS');
|
throw new Error('no provisioned speech credentials for TTS');
|
||||||
}
|
}
|
||||||
await this._startTranscribing(cs, ep);
|
await this._startTranscribing(cs, ep, 1);
|
||||||
|
if (this.separateRecognitionPerChannel && ep2) await this._startTranscribing(cs, ep2, 2);
|
||||||
|
|
||||||
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
|
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
|
||||||
.catch(() => {/*already logged error */});
|
.catch(() => {/*already logged error */});
|
||||||
|
|
||||||
@@ -106,11 +109,15 @@ class TaskTranscribe extends Task {
|
|||||||
// hangup after 1 sec if we don't get a final transcription
|
// hangup after 1 sec if we don't get a final transcription
|
||||||
this._timer = setTimeout(() => this.notifyTaskDone(), 1000);
|
this._timer = setTimeout(() => this.notifyTaskDone(), 1000);
|
||||||
}
|
}
|
||||||
|
if (this.separateRecognitionPerChannel && this.ep2 && this.ep2.connected) {
|
||||||
|
this.ep2.stopTranscription({vendor: this.vendor})
|
||||||
|
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
|
||||||
|
}
|
||||||
else this.notifyTaskDone();
|
else this.notifyTaskDone();
|
||||||
await this.awaitTaskDone();
|
await this.awaitTaskDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
async _startTranscribing(cs, ep) {
|
async _startTranscribing(cs, ep, channel) {
|
||||||
const opts = {};
|
const opts = {};
|
||||||
|
|
||||||
if (this.vad.enable) {
|
if (this.vad.enable) {
|
||||||
@@ -119,22 +126,24 @@ class TaskTranscribe extends Task {
|
|||||||
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
|
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription,
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
|
this._onTranscription.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
|
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
|
||||||
this._onMaxDurationExceeded.bind(this, cs, ep));
|
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
|
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded,
|
ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded,
|
||||||
this._onMaxDurationExceeded.bind(this, cs, ep));
|
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription,
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep));
|
this._onTranscription.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
||||||
|
|
||||||
if (this.vendor === 'google') {
|
if (this.vendor === 'google') {
|
||||||
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
||||||
[
|
[
|
||||||
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
|
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
|
||||||
['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
|
//['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
|
||||||
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
||||||
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
||||||
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
||||||
@@ -222,12 +231,12 @@ class TaskTranscribe extends Task {
|
|||||||
vendor: this.vendor,
|
vendor: this.vendor,
|
||||||
interim: this.interim ? true : false,
|
interim: this.interim ? true : false,
|
||||||
locale: this.language,
|
locale: this.language,
|
||||||
channels: this.separateRecognitionPerChannel ? 2 : 1
|
channels: /*this.separateRecognitionPerChannel ? 2 : */ 1
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
_onTranscription(cs, ep, evt) {
|
_onTranscription(cs, ep, channel, evt) {
|
||||||
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
|
this.logger.debug({evt, channel}, 'TaskTranscribe:_onTranscription');
|
||||||
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
|
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
|
||||||
if ('microsoft' === this.vendor) {
|
if ('microsoft' === this.vendor) {
|
||||||
const nbest = evt.NBest;
|
const nbest = evt.NBest;
|
||||||
@@ -240,12 +249,13 @@ class TaskTranscribe extends Task {
|
|||||||
}) :
|
}) :
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
transcript: evt.Text
|
transcript: evt.DisplayText
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
const newEvent = {
|
const newEvent = {
|
||||||
is_final: evt.RecognitionStatus === 'Success',
|
is_final: evt.RecognitionStatus === 'Success',
|
||||||
|
channel,
|
||||||
language_code,
|
language_code,
|
||||||
alternatives
|
alternatives
|
||||||
};
|
};
|
||||||
@@ -257,6 +267,8 @@ class TaskTranscribe extends Task {
|
|||||||
return this._transcribe(ep);
|
return this._transcribe(ep);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
evt.channel_tag = channel;
|
||||||
|
|
||||||
if (this.transcriptionHook) {
|
if (this.transcriptionHook) {
|
||||||
const b3 = this.getTracingPropagation();
|
const b3 = this.getTracingPropagation();
|
||||||
const httpHeaders = b3 && {b3};
|
const httpHeaders = b3 && {b3};
|
||||||
@@ -274,13 +286,13 @@ class TaskTranscribe extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_onNoAudio(cs, ep) {
|
_onNoAudio(cs, ep, channel) {
|
||||||
this.logger.debug('TaskTranscribe:_onNoAudio restarting transcription');
|
this.logger.debug(`TaskTranscribe:_onNoAudio restarting transcription on channel ${channel}`);
|
||||||
this._transcribe(ep);
|
this._transcribe(ep);
|
||||||
}
|
}
|
||||||
|
|
||||||
_onMaxDurationExceeded(cs, ep) {
|
_onMaxDurationExceeded(cs, ep, channel) {
|
||||||
this.logger.debug('TaskTranscribe:_onMaxDurationExceeded restarting transcription');
|
this.logger.debug(`TaskTranscribe:_onMaxDurationExceeded restarting transcription on channel ${channel}`);
|
||||||
this._transcribe(ep);
|
this._transcribe(ep);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user