mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
Transcribe background task (#576)
* first draft * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * update verb-specification * fix comment reviews * provide bugname when stopping transcription, otherwise it will continue --------- Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
@@ -9,7 +9,8 @@ class TaskConfig extends Task {
|
||||
'recognizer',
|
||||
'bargeIn',
|
||||
'record',
|
||||
'listen'
|
||||
'listen',
|
||||
'transcribe'
|
||||
].forEach((k) => this[k] = this.data[k] || {});
|
||||
|
||||
if ('notifyEvents' in this.data) {
|
||||
@@ -30,6 +31,13 @@ class TaskConfig extends Task {
|
||||
if (this.bargeIn[k]) this.gatherOpts[k] = this.bargeIn[k];
|
||||
});
|
||||
}
|
||||
if (this.transcribe?.enable) {
|
||||
this.transcribeOpts = {
|
||||
verb: 'transcribe',
|
||||
...this.transcribe
|
||||
};
|
||||
delete this.transcribeOpts.enable;
|
||||
}
|
||||
|
||||
if (this.data.reset) {
|
||||
if (typeof this.data.reset === 'string') this.data.reset = [this.data.reset];
|
||||
@@ -37,7 +45,11 @@ class TaskConfig extends Task {
|
||||
else this.data.reset = [];
|
||||
|
||||
if (this.bargeIn.sticky) this.autoEnable = true;
|
||||
this.preconditions = (this.bargeIn.enable || this.record?.action || this.listen?.url || this.data.amd) ?
|
||||
this.preconditions = (this.bargeIn.enable ||
|
||||
this.record?.action ||
|
||||
this.listen?.url ||
|
||||
this.data.amd ||
|
||||
this.transcribe?.enable) ?
|
||||
TaskPreconditions.Endpoint :
|
||||
TaskPreconditions.None;
|
||||
|
||||
@@ -50,6 +62,7 @@ class TaskConfig extends Task {
|
||||
get hasRecognizer() { return Object.keys(this.recognizer).length; }
|
||||
get hasRecording() { return Object.keys(this.record).length; }
|
||||
get hasListen() { return Object.keys(this.listen).length; }
|
||||
get hasTranscribe() { return Object.keys(this.transcribe).length; }
|
||||
|
||||
get summary() {
|
||||
const phrase = [];
|
||||
@@ -72,6 +85,9 @@ class TaskConfig extends Task {
|
||||
if (this.hasListen) {
|
||||
phrase.push(this.listen.enable ? `listen ${this.listen.url}` : 'stop listen');
|
||||
}
|
||||
if (this.hasTranscribe) {
|
||||
phrase.push(this.transcribe.enable ? `transcribe ${this.transcribe.transcriptionHook}` : 'stop transcribe');
|
||||
}
|
||||
if (this.data.amd) phrase.push('enable amd');
|
||||
if (this.notifyEvents) phrase.push(`event notification ${this.notifyEvents ? 'on' : 'off'}`);
|
||||
if (this.onHoldMusic) phrase.push(`onHoldMusic: ${this.onHoldMusic}`);
|
||||
@@ -212,10 +228,25 @@ class TaskConfig extends Task {
|
||||
const {enable, ...opts} = this.listen;
|
||||
if (enable) {
|
||||
this.logger.debug({opts}, 'Config: enabling listen');
|
||||
cs.startBackgroundListen({verb: 'listen', ...opts});
|
||||
cs.startBackgroundTask('listen', {verb: 'listen', ...opts});
|
||||
} else {
|
||||
this.logger.info('Config: disabling listen');
|
||||
cs.stopBackgroundListen();
|
||||
cs.stopBackgroundTask('listen');
|
||||
}
|
||||
}
|
||||
if (this.hasTranscribe) {
|
||||
if (this.transcribe.enable) {
|
||||
this.transcribeOpts.recognizer = this.hasRecognizer ?
|
||||
this.recognizer :
|
||||
{
|
||||
vendor: cs.speechRecognizerVendor,
|
||||
language: cs.speechRecognizerLanguage
|
||||
};
|
||||
this.logger.debug(this.transcribeOpts, 'Config: enabling transcribe');
|
||||
cs.startBackgroundTask('transcribe', this.transcribeOpts);
|
||||
} else {
|
||||
this.logger.info('Config: disabling transcribe');
|
||||
cs.stopBackgroundTask('transcribe');
|
||||
}
|
||||
}
|
||||
if (this.data.sipRequestWithinDialogHook) {
|
||||
|
||||
@@ -71,6 +71,7 @@ class TaskGather extends SttTask {
|
||||
/* buffer speech for continuous asr */
|
||||
this._bufferedTranscripts = [];
|
||||
this.partialTranscriptsCount = 0;
|
||||
this.bugname_prefix = 'gather_';
|
||||
}
|
||||
|
||||
get name() { return TaskName.Gather; }
|
||||
@@ -261,7 +262,10 @@ class TaskGather extends SttTask {
|
||||
if (this.digitBuffer.length === 0 && this.needsStt) {
|
||||
// DTMF is higher priority than STT.
|
||||
this.removeSpeechListeners(ep);
|
||||
ep.stopTranscription({vendor: this.vendor})
|
||||
ep.stopTranscription({
|
||||
vendor: this.vendor,
|
||||
bugname: this.bugname,
|
||||
})
|
||||
.catch((err) => this.logger.error({err},
|
||||
` Received DTMF, Error stopping transcription for vendor ${this.vendor}`));
|
||||
}
|
||||
@@ -300,7 +304,7 @@ class TaskGather extends SttTask {
|
||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
||||
switch (this.vendor) {
|
||||
case 'google':
|
||||
this.bugname = 'google_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}google_transcribe`;
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
@@ -308,19 +312,19 @@ class TaskGather extends SttTask {
|
||||
|
||||
case 'aws':
|
||||
case 'polly':
|
||||
this.bugname = 'aws_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}aws_transcribe`;
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
break;
|
||||
case 'microsoft':
|
||||
this.bugname = 'azure_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}azure_transcribe`;
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected,
|
||||
this._onNoSpeechDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
break;
|
||||
case 'nuance':
|
||||
this.bugname = 'nuance_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}nuance_transcribe`;
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech,
|
||||
@@ -337,7 +341,7 @@ class TaskGather extends SttTask {
|
||||
break;
|
||||
|
||||
case 'deepgram':
|
||||
this.bugname = 'deepgram_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}deepgram_transcribe`;
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
|
||||
@@ -348,12 +352,12 @@ class TaskGather extends SttTask {
|
||||
break;
|
||||
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}soniox_transcribe`;
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'cobalt':
|
||||
this.bugname = 'cobalt_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}cobalt_transcribe`;
|
||||
ep.addCustomEventListener(CobaltTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
|
||||
/* cobalt doesnt have language, it has model, which is required */
|
||||
@@ -383,7 +387,7 @@ class TaskGather extends SttTask {
|
||||
break;
|
||||
|
||||
case 'ibm':
|
||||
this.bugname = 'ibm_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}ibm_transcribe`;
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
|
||||
@@ -391,7 +395,7 @@ class TaskGather extends SttTask {
|
||||
break;
|
||||
|
||||
case 'nvidia':
|
||||
this.bugname = 'nvidia_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}nvidia_transcribe`;
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.StartOfSpeech,
|
||||
@@ -408,7 +412,7 @@ class TaskGather extends SttTask {
|
||||
break;
|
||||
|
||||
case 'assemblyai':
|
||||
this.bugname = 'assemblyai_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}assemblyai_transcribe`;
|
||||
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
@@ -418,7 +422,7 @@ class TaskGather extends SttTask {
|
||||
break;
|
||||
default:
|
||||
if (this.vendor.startsWith('custom:')) {
|
||||
this.bugname = `${this.vendor}_transcribe`;
|
||||
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure,
|
||||
@@ -752,7 +756,10 @@ class TaskGather extends SttTask {
|
||||
async _onJambonzError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onJambonzError');
|
||||
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
|
||||
ep.stopTranscription({vendor: this.vendor})
|
||||
ep.stopTranscription({
|
||||
vendor: this.vendor,
|
||||
bugname: this.bugname
|
||||
})
|
||||
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||
try {
|
||||
@@ -833,7 +840,10 @@ class TaskGather extends SttTask {
|
||||
'stt.result': JSON.stringify(evt)
|
||||
});
|
||||
if (this.needsStt && this.ep && this.ep.connected) {
|
||||
this.ep.stopTranscription({vendor: this.vendor})
|
||||
this.ep.stopTranscription({
|
||||
vendor: this.vendor,
|
||||
bugname: this.bugname
|
||||
})
|
||||
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
|
||||
}
|
||||
|
||||
|
||||
@@ -49,6 +49,8 @@ class SttTask extends Task {
|
||||
|
||||
/* buffer for soniox transcripts */
|
||||
this._sonioxTranscripts = [];
|
||||
/*bug name prefix */
|
||||
this.bugname_prefix = '';
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -33,13 +33,14 @@ class TaskTranscribe extends SttTask {
|
||||
|
||||
this.childSpan = [null, null];
|
||||
|
||||
// Continuos asr timeout
|
||||
// Continuous asr timeout
|
||||
this.asrTimeout = typeof this.data.recognizer.asrTimeout === 'number' ? this.data.recognizer.asrTimeout * 1000 : 0;
|
||||
if (this.asrTimeout > 0) {
|
||||
this.isContinuousAsr = true;
|
||||
}
|
||||
/* buffer speech for continuous asr */
|
||||
this._bufferedTranscripts = [];
|
||||
this.bugname_prefix = 'transcribe_';
|
||||
}
|
||||
|
||||
get name() { return TaskName.Transcribe; }
|
||||
@@ -86,7 +87,10 @@ class TaskTranscribe extends SttTask {
|
||||
let stopTranscription = false;
|
||||
if (this.ep?.connected) {
|
||||
stopTranscription = true;
|
||||
this.ep.stopTranscription({vendor: this.vendor})
|
||||
this.ep.stopTranscription({
|
||||
vendor: this.vendor,
|
||||
bugname: this.bugname
|
||||
})
|
||||
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
|
||||
}
|
||||
if (this.separateRecognitionPerChannel && this.ep2 && this.ep2.connected) {
|
||||
@@ -137,7 +141,7 @@ class TaskTranscribe extends SttTask {
|
||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
||||
switch (this.vendor) {
|
||||
case 'google':
|
||||
this.bugname = 'google_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}google_transcribe`;
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected,
|
||||
@@ -148,7 +152,7 @@ class TaskTranscribe extends SttTask {
|
||||
|
||||
case 'aws':
|
||||
case 'polly':
|
||||
this.bugname = 'aws_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}aws_transcribe`;
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected,
|
||||
@@ -157,19 +161,19 @@ class TaskTranscribe extends SttTask {
|
||||
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'microsoft':
|
||||
this.bugname = 'azure_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}azure_transcribe`;
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected,
|
||||
this._onNoAudio.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'nuance':
|
||||
this.bugname = 'nuance_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}nuance_transcribe`;
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'deepgram':
|
||||
this.bugname = 'deepgram_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}deepgram_transcribe`;
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect,
|
||||
@@ -182,12 +186,12 @@ class TaskTranscribe extends SttTask {
|
||||
|
||||
break;
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}soniox_transcribe`;
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'cobalt':
|
||||
this.bugname = 'cobalt_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}cobalt_transcribe`;
|
||||
ep.addCustomEventListener(CobaltTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
|
||||
@@ -217,7 +221,7 @@ class TaskTranscribe extends SttTask {
|
||||
break;
|
||||
|
||||
case 'ibm':
|
||||
this.bugname = 'ibm_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}ibm_transcribe`;
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Connect,
|
||||
@@ -227,13 +231,13 @@ class TaskTranscribe extends SttTask {
|
||||
break;
|
||||
|
||||
case 'nvidia':
|
||||
this.bugname = 'nvidia_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}nvidia_transcribe`;
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
break;
|
||||
|
||||
case 'assemblyai':
|
||||
this.bugname = 'assemblyai_transcribe';
|
||||
this.bugname = `${this.bugname_prefix}assemblyai_transcribe`;
|
||||
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
@@ -244,7 +248,7 @@ class TaskTranscribe extends SttTask {
|
||||
|
||||
default:
|
||||
if (this.vendor.startsWith('custom:')) {
|
||||
this.bugname = `${this.vendor}_transcribe`;
|
||||
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
@@ -275,6 +279,8 @@ class TaskTranscribe extends SttTask {
|
||||
}
|
||||
|
||||
async _transcribe(ep) {
|
||||
this.logger.debug(
|
||||
`TaskTranscribe:_transcribe - starting transcription vendor ${this.vendor} bugname ${this.bugname}`);
|
||||
await ep.startTranscription({
|
||||
vendor: this.vendor,
|
||||
interim: this.interim ? true : false,
|
||||
@@ -434,7 +440,10 @@ class TaskTranscribe extends SttTask {
|
||||
async _onJambonzError(cs, _ep, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
|
||||
_ep.stopTranscription({vendor: this.vendor})
|
||||
_ep.stopTranscription({
|
||||
vendor: this.vendor,
|
||||
bugname: this.bugname
|
||||
})
|
||||
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user