Files
jambonz-feature-server/lib/tasks/transcribe.js
2021-01-31 15:49:19 -05:00

188 lines
7.4 KiB
JavaScript

const Task = require('./task');
const {
TaskName,
TaskPreconditions,
GoogleTranscriptionEvents,
AwsTranscriptionEvents
} = require('../utils/constants');
class TaskTranscribe extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint;
this.transcriptionHook = this.data.transcriptionHook;
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
const recognizer = this.data.recognizer;
this.vendor = recognizer.vendor;
if ('default' === this.vendor || !this.vendor) this.vendor = this.callSession.speechRecognizerVendor
this.language = recognizer.language;
if ('default' === this.language || !this.language) this.language = this.callSession.speechRecognizerLanguage;
this.interim = !!recognizer.interim;
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
/* google-specific options */
this.hints = recognizer.hints || [];
this.profanityFilter = recognizer.profanityFilter;
this.punctuation = !!recognizer.punctuation;
this.enhancedModel = !!recognizer.enhancedModel;
this.words = !!recognizer.words;
this.diarization = !!recognizer.diarization;
this.diarizationMinSpeakers = recognizer.diarizationMinSpeakers || 0;
this.diarizationMaxSpeakers = recognizer.diarizationMaxSpeakers || 0;
this.interactionType = recognizer.interactionType || 'unspecified';
this.naicsCode = recognizer.naicsCode || 0;
this.altLanguages = recognizer.altLanguages || [];
/* aws-specific options */
this.identifyChannels = !!recognizer.identifyChannels;
this.vocabularyName = recognizer.vocabularyName;
this.vocabularyFilterName = recognizer.vocabularyFilterName;
this.filterMethod = recognizer.filterMethod;
}
get name() { return TaskName.Transcribe; }
async exec(cs, ep, parentTask) {
super.exec(cs);
this.ep = ep;
try {
await this._startTranscribing(ep);
await this.awaitTaskDone();
} catch (err) {
this.logger.info(err, 'TaskTranscribe:exec - error');
}
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
ep.removeCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected);
ep.removeCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded);
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
ep.removeCustomEventListener(AwsTranscriptionEvents.NoAudioDetected);
ep.removeCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded);
}
async kill(cs) {
super.kill(cs);
if (this.ep.connected) {
this.ep.stopTranscription({vendor: this.vendor})
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
// hangup after 1 sec if we don't get a final transcription
this._timer = setTimeout(() => this.notifyTaskDone(), 1000);
}
else this.notifyTaskDone();
await this.awaitTaskDone();
}
async _startTranscribing(ep) {
const opts = {};
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
this._onMaxDurationExceeded.bind(this, ep));
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, ep));
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, ep));
ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded,
this._onMaxDurationExceeded.bind(this, ep));
if (this.vendor === 'google') {
[
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
].forEach((arr) => {
if (this[arr[0]]) opts[arr[1]] = true;
});
if (this.hints.length > 1) opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');
if (this.altLanguages.length > 1) opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
if ('unspecified' !== this.interactionType) {
opts.GOOGLE_SPEECH_METADATA_INTERACTION_TYPE = this.interactionType;
// additionally set model if appropriate
if ('phone_call' === this.interactionType) opts.GOOGLE_SPEECH_MODEL = 'phone_call';
else if (['voice_search', 'voice_command'].includes(this.interactionType)) {
opts.GOOGLE_SPEECH_MODEL = 'command_and_search';
}
else opts.GOOGLE_SPEECH_MODEL = 'phone_call';
}
else opts.GOOGLE_SPEECH_MODEL = 'phone_call';
if (this.diarization && this.diarizationMinSpeakers > 0) {
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT = this.diarizationMinSpeakers;
}
if (this.diarization && this.diarizationMaxSpeakers > 0) {
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT = this.diarizationMaxSpeakers;
}
if (this.naicsCode > 0) opts.GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE = this.naicsCode;
await ep.set(opts)
.catch((err) => this.logger.info(err, 'TaskTranscribe:_startTranscribing with google'));
}
else if (this.vendor === 'aws') {
[
['diarization', 'AWS_SHOW_SPEAKER_LABEL'],
['identifyChannels', 'AWS_ENABLE_CHANNEL_IDENTIFICATION']
].forEach((arr) => {
if (this[arr[0]]) opts[arr[1]] = true;
});
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
if (this.vocabularyFilterName) {
opts.AWS_VOCABULARY_NAME = this.vocabularyFilterName;
opts.AWS_VOCABULARY_FILTER_METHOD = this.filterMethod || 'mask';
}
Object.assign(opts, {
AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY,
AWS_REGION: process.env.AWS_REGION
});
await ep.set(opts)
.catch((err) => this.logger.info(err, 'TaskTranscribe:_startTranscribing with aws'));
}
await this._transcribe(ep);
}
async _transcribe(ep) {
await ep.startTranscription({
vendor: this.vendor,
interim: this.interim ? true : false,
language: this.language,
channels: this.separateRecognitionPerChannel ? 2 : 1
});
}
_onTranscription(ep, evt) {
this.logger.debug(evt, 'TaskTranscribe:_onTranscription');
this.cs.requestor.request(this.transcriptionHook, Object.assign({speech: evt}, this.cs.callInfo))
.catch((err) => this.logger.info(err, 'TranscribeTask:_onTranscription error'));
if (this.killed) {
this.logger.debug('TaskTranscribe:_onTranscription exiting after receiving final transcription');
this._clearTimer();
this.notifyTaskDone();
}
}
_onNoAudio(ep) {
this.logger.debug('TaskTranscribe:_onNoAudio restarting transcription');
this._transcribe(ep);
}
_onMaxDurationExceeded(ep) {
this.logger.debug('TaskTranscribe:_onMaxDurationExceeded restarting transcription');
this._transcribe(ep);
}
_clearTimer() {
if (this._timer) {
clearTimeout(this._timer);
this._timer = null;
}
}
}
module.exports = TaskTranscribe;