mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
feat: update speech-ultil version 1.0.1 (#275)
* feat: update speech-ultil version 1.0.1 * feat: update speech-ultil version 1.0.1 * more fixes for custom stt * more fixes * fixes * update drachtio-fsmrf * pass url to mod_jambonz_transcribe * transcription utils: handle custom results * handle custom speech vendor errors * add support for hints to custom speech * change to custom speech options * send hints as an array for custom speech * update latest speech-utils * transcribe: changes to support soniox * bugfix: soniox transcribe --------- Co-authored-by: Quan HL <quanluuhoang8@gmail.com> Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
@@ -9,7 +9,8 @@ const {
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
|
||||
const makeTask = require('./make_task');
|
||||
@@ -379,8 +380,6 @@ class TaskGather extends Task {
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.Error,
|
||||
this._onNuanceError.bind(this, cs, ep));
|
||||
|
||||
/* stall timers until prompt finishes playing */
|
||||
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||
@@ -399,8 +398,6 @@ class TaskGather extends Task {
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Error,
|
||||
this._onSonioxError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'ibm':
|
||||
@@ -409,8 +406,6 @@ class TaskGather extends Task {
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onIbmConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
|
||||
this._onIbmConnectFailure.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
|
||||
this._onIbmError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'nvidia':
|
||||
@@ -423,8 +418,6 @@ class TaskGather extends Task {
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
|
||||
this._onNvidiaError.bind(this, cs, ep));
|
||||
|
||||
/* I think nvidia has this (??) - stall timers until prompt finishes playing */
|
||||
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||
@@ -433,11 +426,23 @@ class TaskGather extends Task {
|
||||
break;
|
||||
|
||||
default:
|
||||
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
|
||||
this.notifyTaskDone();
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
if (this.vendor.startsWith('custom:')) {
|
||||
this.bugname = `${this.vendor}_transcribe`;
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onJambonzConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure,
|
||||
this._onJambonzConnectFailure.bind(this, cs, ep));
|
||||
break;
|
||||
}
|
||||
else {
|
||||
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
|
||||
this.notifyTaskDone();
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
}
|
||||
}
|
||||
|
||||
/* common handler for all stt engine errors */
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
|
||||
await ep.set(opts)
|
||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||
}
|
||||
@@ -662,26 +667,30 @@ class TaskGather extends Task {
|
||||
_onTranscriptionComplete(cs, ep) {
|
||||
this.logger.debug('TaskGather:_onTranscriptionComplete');
|
||||
}
|
||||
_onNuanceError(cs, ep, evt) {
|
||||
const {code, error, details} = evt;
|
||||
if (code === 404 && error === 'No speech') {
|
||||
this.logger.debug({code, error, details}, 'TaskGather:_onNuanceError');
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({code, error, details}, 'TaskGather:_onNuanceError');
|
||||
if (code === 413 && error === 'Too much speech') {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onSonioxError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onSonioxError');
|
||||
}
|
||||
_onNvidiaError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
|
||||
}
|
||||
_onDeepgramConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskGather:_onDeepgramConnect');
|
||||
}
|
||||
_onJambonzConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskGather:_onJambonzConnect');
|
||||
}
|
||||
_onJambonzError(cs, _ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onJambonzError');
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
|
||||
if (this.vendor === 'nuance') {
|
||||
const {code, error} = evt;
|
||||
if (code === 404 && error === 'No speech') return this._resolve('timeout');
|
||||
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({evt}, 'TaskGather:_onJambonzError');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
|
||||
}
|
||||
|
||||
_onDeepGramConnectFailure(cs, _ep, evt) {
|
||||
const {reason} = evt;
|
||||
@@ -696,6 +705,19 @@ class TaskGather extends Task {
|
||||
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor deepgram: ${reason}`});
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
_onJambonzConnectFailure(cs, _ep, evt) {
|
||||
const {reason} = evt;
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info({evt}, 'TaskGather:_onJambonzConnectFailure');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
_onIbmConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskGather:_onIbmConnect');
|
||||
|
||||
@@ -9,7 +9,8 @@ const {
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
const { normalizeJambones } = require('@jambonz/verb-specifications');
|
||||
|
||||
@@ -23,11 +24,13 @@ class TaskTranscribe extends Task {
|
||||
setChannelVarsForStt,
|
||||
normalizeTranscription,
|
||||
removeSpeechListeners,
|
||||
setSpeechCredentialsAtRuntime
|
||||
setSpeechCredentialsAtRuntime,
|
||||
compileSonioxTranscripts
|
||||
} = require('../utils/transcription-utils')(logger);
|
||||
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||
this.normalizeTranscription = normalizeTranscription;
|
||||
this.removeSpeechListeners = removeSpeechListeners;
|
||||
this.compileSonioxTranscripts = compileSonioxTranscripts;
|
||||
|
||||
this.transcriptionHook = this.data.transcriptionHook;
|
||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||
@@ -41,6 +44,9 @@ class TaskTranscribe extends Task {
|
||||
/* let credentials be supplied in the recognizer object at runtime */
|
||||
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
||||
|
||||
/* buffer for soniox transcripts */
|
||||
this._sonioxTranscripts = [];
|
||||
|
||||
recognizer.hints = recognizer.hints || [];
|
||||
recognizer.altLanguages = recognizer.altLanguages || [];
|
||||
}
|
||||
@@ -184,8 +190,6 @@ class TaskTranscribe extends Task {
|
||||
this._onStartOfSpeech.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete,
|
||||
this._onTranscriptionComplete.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Error,
|
||||
this._onNuanceError.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'deepgram':
|
||||
this.bugname = 'deepgram_transcribe';
|
||||
@@ -198,9 +202,8 @@ class TaskTranscribe extends Task {
|
||||
break;
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Error,
|
||||
this._onSonioxError.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'ibm':
|
||||
this.bugname = 'ibm_transcribe';
|
||||
@@ -210,8 +213,6 @@ class TaskTranscribe extends Task {
|
||||
this._onIbmConnect.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
|
||||
this._onIbmConnectFailure.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
|
||||
this._onIbmError.bind(this, cs, ep, channel));
|
||||
break;
|
||||
|
||||
case 'nvidia':
|
||||
@@ -224,14 +225,13 @@ class TaskTranscribe extends Task {
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.Error,
|
||||
this._onNvidiaError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
}
|
||||
|
||||
/* common handler for all stt engine errors */
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Error, this._onJambonzError.bind(this, cs, ep));
|
||||
await ep.set(opts)
|
||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||
|
||||
@@ -273,6 +273,15 @@ class TaskTranscribe extends Task {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.vendor === 'soniox') {
|
||||
/* compile transcripts into one */
|
||||
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
||||
if (evt.is_final) {
|
||||
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
|
||||
this._sonioxTranscripts = [];
|
||||
}
|
||||
}
|
||||
|
||||
if (this.transcriptionHook) {
|
||||
const b3 = this.getTracingPropagation();
|
||||
const httpHeaders = b3 && {b3};
|
||||
@@ -321,23 +330,6 @@ class TaskTranscribe extends Task {
|
||||
this._timer = null;
|
||||
}
|
||||
}
|
||||
_onNuanceError(_cs, _ep, _channel, evt) {
|
||||
const {code, error, details} = evt;
|
||||
if (code === 404 && error === 'No speech') {
|
||||
this.logger.debug({code, error, details}, 'TaskTranscribe:_onNuanceError');
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({code, error, details}, 'TaskTranscribe:_onNuanceError');
|
||||
if (code === 413 && error === 'Too much speech') {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onSonioxError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onSonioxError');
|
||||
}
|
||||
_onNvidiaError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onNvidiaError');
|
||||
}
|
||||
_onDeepgramConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
|
||||
}
|
||||
@@ -376,6 +368,24 @@ class TaskTranscribe extends Task {
|
||||
_onIbmError(cs, _ep, _channel, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
|
||||
}
|
||||
_onJambonzError(cs, _ep, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
|
||||
if (this.vendor === 'nuance') {
|
||||
const {code, error} = evt;
|
||||
if (code === 404 && error === 'No speech') return this._resolve('timeout');
|
||||
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
|
||||
vendor: this.vendor,
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
|
||||
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user