Transcribe background task (#576)

* first draft

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* update verb-specification

* fix comment reviews

* provide bugname when stopping transcription, otherwise it will continue

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
Hoan Luu Huu
2023-12-27 09:50:51 +07:00
committed by GitHub
parent 3e8474867f
commit 9d8291f892
9 changed files with 342 additions and 176 deletions

View File

@@ -9,7 +9,8 @@ class TaskConfig extends Task {
'recognizer',
'bargeIn',
'record',
'listen'
'listen',
'transcribe'
].forEach((k) => this[k] = this.data[k] || {});
if ('notifyEvents' in this.data) {
@@ -30,6 +31,13 @@ class TaskConfig extends Task {
if (this.bargeIn[k]) this.gatherOpts[k] = this.bargeIn[k];
});
}
if (this.transcribe?.enable) {
this.transcribeOpts = {
verb: 'transcribe',
...this.transcribe
};
delete this.transcribeOpts.enable;
}
if (this.data.reset) {
if (typeof this.data.reset === 'string') this.data.reset = [this.data.reset];
@@ -37,7 +45,11 @@ class TaskConfig extends Task {
else this.data.reset = [];
if (this.bargeIn.sticky) this.autoEnable = true;
this.preconditions = (this.bargeIn.enable || this.record?.action || this.listen?.url || this.data.amd) ?
this.preconditions = (this.bargeIn.enable ||
this.record?.action ||
this.listen?.url ||
this.data.amd ||
this.transcribe?.enable) ?
TaskPreconditions.Endpoint :
TaskPreconditions.None;
@@ -50,6 +62,7 @@ class TaskConfig extends Task {
get hasRecognizer() { return Object.keys(this.recognizer).length; }
get hasRecording() { return Object.keys(this.record).length; }
get hasListen() { return Object.keys(this.listen).length; }
get hasTranscribe() { return Object.keys(this.transcribe).length; }
get summary() {
const phrase = [];
@@ -72,6 +85,9 @@ class TaskConfig extends Task {
if (this.hasListen) {
phrase.push(this.listen.enable ? `listen ${this.listen.url}` : 'stop listen');
}
if (this.hasTranscribe) {
phrase.push(this.transcribe.enable ? `transcribe ${this.transcribe.transcriptionHook}` : 'stop transcribe');
}
if (this.data.amd) phrase.push('enable amd');
if (this.notifyEvents) phrase.push(`event notification ${this.notifyEvents ? 'on' : 'off'}`);
if (this.onHoldMusic) phrase.push(`onHoldMusic: ${this.onHoldMusic}`);
@@ -212,10 +228,25 @@ class TaskConfig extends Task {
const {enable, ...opts} = this.listen;
if (enable) {
this.logger.debug({opts}, 'Config: enabling listen');
cs.startBackgroundListen({verb: 'listen', ...opts});
cs.startBackgroundTask('listen', {verb: 'listen', ...opts});
} else {
this.logger.info('Config: disabling listen');
cs.stopBackgroundListen();
cs.stopBackgroundTask('listen');
}
}
if (this.hasTranscribe) {
if (this.transcribe.enable) {
this.transcribeOpts.recognizer = this.hasRecognizer ?
this.recognizer :
{
vendor: cs.speechRecognizerVendor,
language: cs.speechRecognizerLanguage
};
this.logger.debug(this.transcribeOpts, 'Config: enabling transcribe');
cs.startBackgroundTask('transcribe', this.transcribeOpts);
} else {
this.logger.info('Config: disabling transcribe');
cs.stopBackgroundTask('transcribe');
}
}
if (this.data.sipRequestWithinDialogHook) {

View File

@@ -71,6 +71,7 @@ class TaskGather extends SttTask {
/* buffer speech for continuous asr */
this._bufferedTranscripts = [];
this.partialTranscriptsCount = 0;
this.bugname_prefix = 'gather_';
}
get name() { return TaskName.Gather; }
@@ -261,7 +262,10 @@ class TaskGather extends SttTask {
if (this.digitBuffer.length === 0 && this.needsStt) {
// DTMF is higher priority than STT.
this.removeSpeechListeners(ep);
ep.stopTranscription({vendor: this.vendor})
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
})
.catch((err) => this.logger.error({err},
` Received DTMF, Error stopping transcription for vendor ${this.vendor}`));
}
@@ -300,7 +304,7 @@ class TaskGather extends SttTask {
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
switch (this.vendor) {
case 'google':
this.bugname = 'google_transcribe';
this.bugname = `${this.bugname_prefix}google_transcribe`;
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
@@ -308,19 +312,19 @@ class TaskGather extends SttTask {
case 'aws':
case 'polly':
this.bugname = 'aws_transcribe';
this.bugname = `${this.bugname_prefix}aws_transcribe`;
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
break;
case 'microsoft':
this.bugname = 'azure_transcribe';
this.bugname = `${this.bugname_prefix}azure_transcribe`;
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected,
this._onNoSpeechDetected.bind(this, cs, ep));
ep.addCustomEventListener(AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
break;
case 'nuance':
this.bugname = 'nuance_transcribe';
this.bugname = `${this.bugname_prefix}nuance_transcribe`;
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech,
@@ -337,7 +341,7 @@ class TaskGather extends SttTask {
break;
case 'deepgram':
this.bugname = 'deepgram_transcribe';
this.bugname = `${this.bugname_prefix}deepgram_transcribe`;
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
@@ -348,12 +352,12 @@ class TaskGather extends SttTask {
break;
case 'soniox':
this.bugname = 'soniox_transcribe';
this.bugname = `${this.bugname_prefix}soniox_transcribe`;
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'cobalt':
this.bugname = 'cobalt_transcribe';
this.bugname = `${this.bugname_prefix}cobalt_transcribe`;
ep.addCustomEventListener(CobaltTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
/* cobalt doesnt have language, it has model, which is required */
@@ -383,7 +387,7 @@ class TaskGather extends SttTask {
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
this.bugname = `${this.bugname_prefix}ibm_transcribe`;
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
@@ -391,7 +395,7 @@ class TaskGather extends SttTask {
break;
case 'nvidia':
this.bugname = 'nvidia_transcribe';
this.bugname = `${this.bugname_prefix}nvidia_transcribe`;
ep.addCustomEventListener(NvidiaTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(NvidiaTranscriptionEvents.StartOfSpeech,
@@ -408,7 +412,7 @@ class TaskGather extends SttTask {
break;
case 'assemblyai':
this.bugname = 'assemblyai_transcribe';
this.bugname = `${this.bugname_prefix}assemblyai_transcribe`;
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
@@ -418,7 +422,7 @@ class TaskGather extends SttTask {
break;
default:
if (this.vendor.startsWith('custom:')) {
this.bugname = `${this.vendor}_transcribe`;
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure,
@@ -752,7 +756,10 @@ class TaskGather extends SttTask {
async _onJambonzError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onJambonzError');
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
ep.stopTranscription({vendor: this.vendor})
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
try {
@@ -833,7 +840,10 @@ class TaskGather extends SttTask {
'stt.result': JSON.stringify(evt)
});
if (this.needsStt && this.ep && this.ep.connected) {
this.ep.stopTranscription({vendor: this.vendor})
this.ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
}

View File

@@ -49,6 +49,8 @@ class SttTask extends Task {
/* buffer for soniox transcripts */
this._sonioxTranscripts = [];
/*bug name prefix */
this.bugname_prefix = '';
}

View File

@@ -33,13 +33,14 @@ class TaskTranscribe extends SttTask {
this.childSpan = [null, null];
// Continuos asr timeout
// Continuous asr timeout
this.asrTimeout = typeof this.data.recognizer.asrTimeout === 'number' ? this.data.recognizer.asrTimeout * 1000 : 0;
if (this.asrTimeout > 0) {
this.isContinuousAsr = true;
}
/* buffer speech for continuous asr */
this._bufferedTranscripts = [];
this.bugname_prefix = 'transcribe_';
}
get name() { return TaskName.Transcribe; }
@@ -86,7 +87,10 @@ class TaskTranscribe extends SttTask {
let stopTranscription = false;
if (this.ep?.connected) {
stopTranscription = true;
this.ep.stopTranscription({vendor: this.vendor})
this.ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
}
if (this.separateRecognitionPerChannel && this.ep2 && this.ep2.connected) {
@@ -137,7 +141,7 @@ class TaskTranscribe extends SttTask {
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
switch (this.vendor) {
case 'google':
this.bugname = 'google_transcribe';
this.bugname = `${this.bugname_prefix}google_transcribe`;
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected,
@@ -148,7 +152,7 @@ class TaskTranscribe extends SttTask {
case 'aws':
case 'polly':
this.bugname = 'aws_transcribe';
this.bugname = `${this.bugname_prefix}aws_transcribe`;
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected,
@@ -157,19 +161,19 @@ class TaskTranscribe extends SttTask {
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
break;
case 'microsoft':
this.bugname = 'azure_transcribe';
this.bugname = `${this.bugname_prefix}azure_transcribe`;
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected,
this._onNoAudio.bind(this, cs, ep, channel));
break;
case 'nuance':
this.bugname = 'nuance_transcribe';
this.bugname = `${this.bugname_prefix}nuance_transcribe`;
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
break;
case 'deepgram':
this.bugname = 'deepgram_transcribe';
this.bugname = `${this.bugname_prefix}deepgram_transcribe`;
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect,
@@ -182,12 +186,12 @@ class TaskTranscribe extends SttTask {
break;
case 'soniox':
this.bugname = 'soniox_transcribe';
this.bugname = `${this.bugname_prefix}soniox_transcribe`;
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
break;
case 'cobalt':
this.bugname = 'cobalt_transcribe';
this.bugname = `${this.bugname_prefix}cobalt_transcribe`;
ep.addCustomEventListener(CobaltTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
@@ -217,7 +221,7 @@ class TaskTranscribe extends SttTask {
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
this.bugname = `${this.bugname_prefix}ibm_transcribe`;
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect,
@@ -227,13 +231,13 @@ class TaskTranscribe extends SttTask {
break;
case 'nvidia':
this.bugname = 'nvidia_transcribe';
this.bugname = `${this.bugname_prefix}nvidia_transcribe`;
ep.addCustomEventListener(NvidiaTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
break;
case 'assemblyai':
this.bugname = 'assemblyai_transcribe';
this.bugname = `${this.bugname_prefix}assemblyai_transcribe`;
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
@@ -244,7 +248,7 @@ class TaskTranscribe extends SttTask {
default:
if (this.vendor.startsWith('custom:')) {
this.bugname = `${this.vendor}_transcribe`;
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
@@ -275,6 +279,8 @@ class TaskTranscribe extends SttTask {
}
async _transcribe(ep) {
this.logger.debug(
`TaskTranscribe:_transcribe - starting transcription vendor ${this.vendor} bugname ${this.bugname}`);
await ep.startTranscription({
vendor: this.vendor,
interim: this.interim ? true : false,
@@ -434,7 +440,10 @@ class TaskTranscribe extends SttTask {
async _onJambonzError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
_ep.stopTranscription({vendor: this.vendor})
_ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
try {